In [20]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import math
import numpy as np

In [6]:
weather_los_angeles = pd.read_csv('../resources/dataset_weather/filtered_grouped_merged.csv')

In [18]:
def calculate_heat_index(temp, humidity):
    # Constants for the formula
    c1 = -42.379
    c2 = 2.04901523
    c3 = 10.14333127
    c4 = -0.22475541
    c5 = -0.00683783
    c6 = -0.05481717
    c7 = 0.00122874
    c8 = 0.00085282
    c9 = -0.00000199

    if temp >= 27:
        hi = (c1 + (c2 * temp) + (c3 * humidity) + (c4 * temp * humidity) +
              (c5 * temp ** 2) + (c6 * humidity ** 2) +
              (c7 * temp ** 2 * humidity) + (c8 * temp * humidity ** 2) +
              (c9 * temp ** 2 * humidity ** 2))
        return hi
    else:
        return temp

def calculate_wind_chill(temp, wind_speed):
    if temp <= 10 and wind_speed >= 4.8:
        wci = 13.12 + 0.6215 * temp - 11.37 * (wind_speed ** 0.16) + 0.3965 * temp * (wind_speed ** 0.16)
        return wci
    else:
        return temp

def calculate_dew_point(temp, humidity):
    a = 17.27
    b = 237.7
    alpha = ((a * temp)/(b + temp)) + math.log(humidity / 100.0)
    dew_point = (b * alpha) / (a - alpha)
    return dew_point

def calculate_humidex(temp, dew_point):
    e = 6.112 * math.exp((17.67 * (dew_point/100)) / ((dew_point/100) + 243.5))
    h = temp + 0.5555 * (e - 10.0)
    return h

def normalize_heat_index(hi):
    return (hi - 70) / (130 - 70) * 100

def normalize_wind_chill(wci):
    return (wci + 50) / (50 + 50) * 100

def normalize_humidex(h):
    return (h - 20) / (50 - 20) * 100

def normalize(value, min_value, max_value):
    return (value - min_value) / (max_value - min_value) * 100

def calculate_cwcs(temp, humidity, wind_speed):
    temp = temp - 273
    dew_point = calculate_dew_point(temp, humidity)
    
    hi = calculate_heat_index(temp, humidity)
    wci = calculate_wind_chill(temp, wind_speed)
    humidex = calculate_humidex(temp, dew_point)

    # Normalize the indices with improved bounds
    normalized_hi = max(0, min(100, normalize(hi, 70, 130)))
    normalized_wci = max(0, min(100, normalize(wci, -50, 50)))
    normalized_humidex = max(0, min(100, normalize(humidex, 20, 50)))

    # Determine weights based on temperature
    if temp > 25:
        weight_hi = 0.5
        weight_wci = 0.1
        weight_humidex = 0.4
    elif temp < 10:
        weight_hi = 0.1
        weight_wci = 0.8
        weight_humidex = 0.1
    else:
        weight_hi = 0.33
        weight_wci = 0.34
        weight_humidex = 0.33

    cwcs = (weight_hi * normalized_hi) + (weight_wci * normalized_wci) + (weight_humidex * normalized_humidex)
    return cwcs

# Assuming 'weather_los_angeles' is your DataFrame
def calculate_cwcs_for_row(row):
    return calculate_cwcs(row['Temp'], row['Humidity'], row['Wind_Speed_km'])

# Apply the function to each row
weather_los_angeles['weather_score'] = weather_los_angeles.apply(calculate_cwcs_for_row, axis=1)

# Display the DataFrame with the new 'weather_score' column
display(weather_los_angeles)

Unnamed: 0,datetime,Temp,Humidity,Wind_Speed_km,Temp_bins,Humidity_bins,Wind_Speed_bins,date,count,count_bins,weather_score
0,2016-03-22,288.370099,59.875000,33.666667,Low,Low,High,March,5,Low,22.225834
1,2016-03-23,289.984146,32.791667,16.416667,Medium,Low,High,March,68,High,22.774610
2,2016-03-24,290.782704,38.416667,6.791667,Medium,Low,Medium,March,68,High,23.046119
3,2016-03-25,290.866810,52.833333,8.229167,Medium,Low,Medium,March,45,Medium,23.074715
4,2016-03-26,290.318576,68.500000,11.541667,Medium,Medium,High,March,19,Low,22.888316
...,...,...,...,...,...,...,...,...,...,...,...
608,2017-12-07,297.004167,75.166667,14.791667,High,High,High,December,32,Low,27.242943
609,2017-12-08,296.494167,74.916667,4.583333,High,High,Low,December,38,Medium,26.507047
610,2017-12-09,297.546250,75.666667,11.437500,High,High,High,December,20,Low,28.025272
611,2017-12-10,292.112500,72.125000,4.645833,Medium,Medium,Low,December,33,Low,23.498250


In [56]:
print((min(weather_los_angeles['Temp'])+max(weather_los_angeles['Temp']))/2)

291.4929583333333


In [7]:
def norm(data):
    min_val = np.min(data)
    max_val = np.max(data)
    normalized_data = ((data - min_val) / (max_val - min_val))
    return normalized_data
def parabool(data):
    min_val = np.min(data)
    max_val = np.max(data)
    avg = (min_val + max_val)/2
    out_data = abs(data - avg)
    return norm(out_data)

In [8]:
weather_los_angeles['Wind_Speed_km'] = norm(weather_los_angeles['Wind_Speed_km'])
weather_los_angeles['Temp_norm'] = parabool(weather_los_angeles['Temp'])
weather_los_angeles['Humid_norm'] = norm(weather_los_angeles['Humidity'])
display(weather_los_angeles)

Unnamed: 0,datetime,Temp,Humidity,Wind_Speed_km,Temp_bins,Humidity_bins,Wind_Speed_bins,date,count,count_bins,Temp_norm,Humid_norm
0,2016-03-22,288.370099,59.875000,1.000000,Low,Low,High,March,5,Low,0.221727,0.547331
1,2016-03-23,289.984146,32.791667,0.471602,Medium,Low,High,March,68,High,0.106496,0.220040
2,2016-03-24,290.782704,38.416667,0.176771,Medium,Low,Medium,March,68,High,0.049484,0.288016
3,2016-03-25,290.866810,52.833333,0.220804,Medium,Low,Medium,March,45,Medium,0.043480,0.462236
4,2016-03-26,290.318576,68.500000,0.322272,Medium,Medium,High,March,19,Low,0.082620,0.651561
...,...,...,...,...,...,...,...,...,...,...,...,...
608,2017-12-07,297.004167,75.166667,0.421825,High,High,High,December,32,Low,0.392238,0.732125
609,2017-12-08,296.494167,74.916667,0.109126,High,High,Low,December,38,Medium,0.355828,0.729104
610,2017-12-09,297.546250,75.666667,0.319081,High,High,High,December,20,Low,0.430939,0.738167
611,2017-12-10,292.112500,72.125000,0.111040,Medium,Medium,Low,December,33,Low,0.043008,0.695368


In [9]:
weather_los_angeles['weather_score'] = weather_los_angeles['Wind_Speed_km'] + weather_los_angeles['Humid_norm'] + weather_los_angeles['Temp_norm']

In [10]:
weather_los_angeles.to_csv('weather_score_LA.csv', index=False)

In [21]:
fig = px.scatter(weather_los_angeles, x='weather_score', y='count', title='Scatter Plot with Regression Line')

# Show the plot
fig.show()