In [15]:
import pandas as pd 
import numpy as np
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, DateTime
from sqlalchemy.dialects.sqlite import DATETIME

In [16]:
weather_df = pd.read_csv("../Resources/weather_2022.csv")
weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])
weather_df.head()



Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarenergy,uvindex,severerisk,sunrise,sunset,moonphase,conditions,description,icon,stations
0,"New York, NY",2022-01-01,56.3,50.0,52.8,56.3,50.0,52.8,50.4,91.6,...,1.3,1,,2022-01-01T07:20:13,2022-01-01T16:39:22,0.96,"Rain, Overcast",Cloudy skies throughout the day with rain.,rain,"72505394728,KLGA,F8726,KNYC,F1417,72503014732"
1,"New York, NY",2022-01-02,58.5,38.5,50.3,58.5,32.3,48.9,45.4,83.8,...,2.1,2,,2022-01-02T07:20:18,2022-01-02T16:40:13,0.0,"Rain, Overcast",Cloudy skies throughout the day with rain clea...,rain,"72505394728,KLGA,KNYC,F1417,72503014732"
2,"New York, NY",2022-01-03,37.9,23.5,30.1,31.8,12.5,20.8,13.3,49.9,...,1.1,1,,2022-01-03T07:20:21,2022-01-03T16:41:06,0.03,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"72505394728,KLGA,KNYC,F1417,72503014732"
3,"New York, NY",2022-01-04,34.3,19.6,27.2,32.3,7.0,20.3,10.3,49.1,...,9.1,5,,2022-01-04T07:20:22,2022-01-04T16:42:00,0.06,Clear,Clear conditions throughout the day.,clear-day,"72505394728,KLGA,F8726,KNYC,F1417,72503014732"
4,"New York, NY",2022-01-05,46.6,31.1,39.5,44.9,25.3,35.7,32.7,77.0,...,2.1,1,,2022-01-05T07:20:20,2022-01-05T16:42:56,0.1,"Snow, Rain, Partially cloudy",Partly cloudy throughout the day with rain or ...,rain,"72505394728,KLGA,KNYC,F1417,72503014732"


In [17]:
weather_df.columns

Index(['name', 'datetime', 'tempmax', 'tempmin', 'temp', 'feelslikemax',
       'feelslikemin', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob',
       'precipcover', 'preciptype', 'snow', 'snowdepth', 'windgust',
       'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'sunrise',
       'sunset', 'moonphase', 'conditions', 'description', 'icon', 'stations'],
      dtype='object')

In [18]:
columns_to_drop = ['precipprob','name','feelslikemax','feelslikemin','feelslike','precipcover','stations','description','conditions','moonphase','sealevelpressure','cloudcover','snowdepth','winddir','solarradiation','solarenergy','dew','uvindex','windgust']
weather_df = weather_df.drop(columns_to_drop, axis = 1)

weather_df['sunrise'] = pd.to_datetime(weather_df['sunrise']).dt.strftime('%H:%M')
weather_df['sunset'] = pd.to_datetime(weather_df['sunset']).dt.strftime('%H:%M')
weather_df['severerisk'] = weather_df['severerisk'].fillna(0)
weather_df['preciptype'] = weather_df['preciptype'].fillna('dry')
weather_df['preciptype'] = np.where(weather_df['preciptype'].str.contains('rain', case=True),1,0) # 1 represents rain, 0 represents dry
weather_df['preciptype'] = np.where((weather_df['precip'] == 0) & (weather_df['preciptype'] != 'dry'),0,weather_df['preciptype'])
weather_df['snow'] = np.where(weather_df['snow']>0,1,0) # 1 represents snow, 0 represents no snow
weather_df['icon'] = np.where((weather_df['icon'] == 'partly-cloudy-day'),'cloudy',weather_df['icon'])

weather_df.head()


Unnamed: 0,datetime,tempmax,tempmin,temp,humidity,precip,preciptype,snow,windspeed,visibility,severerisk,sunrise,sunset,icon
0,2022-01-01,56.3,50.0,52.8,91.6,0.727,1,0,8.1,5.4,0.0,07:20,16:39,rain
1,2022-01-02,58.5,38.5,50.3,83.8,0.092,1,0,14.2,7.3,0.0,07:20,16:40,rain
2,2022-01-03,37.9,23.5,30.1,49.9,0.0,0,0,16.2,9.9,0.0,07:20,16:41,cloudy
3,2022-01-04,34.3,19.6,27.2,49.1,0.0,0,0,12.4,9.9,0.0,07:20,16:42,clear-day
4,2022-01-05,46.6,31.1,39.5,77.0,0.227,1,0,12.8,8.4,0.0,07:20,16:42,rain


In [19]:
#weather_df.groupby(weather_df['temp']).size()

temperature_bins = [-10, 20, 40, 60, 80, 90, 100]
temperature_labels = ['Freezing Cold', 'Cold', 'Moderate', 'Warm', 'Hot','Unbearable Hot']
precip_bins = [-0.01,0.0001,0.1,0.3,float('inf')]
precip_labels = ['Dry', 'Light', 'Moderate', 'Heavy']
humid_bins = [-float('inf'),50,80,float('inf')]
humid_labels = ['Low','High','Very High']
wind_bins = [4,7.1,12.1,18.1,24.1,31.1]
wind_labels = ['Flags ripple','Flags wave','Leaves scatter','Small trees sway','Umbrellas not working']

weather_df['temperature_category'] = pd.cut(weather_df['temp'], bins=temperature_bins, labels=temperature_labels)
weather_df['precip_category'] = pd.cut(weather_df['precip'], bins=precip_bins, labels=precip_labels)
weather_df['humid_category'] = pd.cut(weather_df['humidity'], bins=humid_bins, labels=humid_labels)
weather_df['wind_level'] = pd.cut(weather_df['windspeed'], bins=wind_bins, labels=wind_labels)

weather_df['temperature_category'] = weather_df['temperature_category'].astype('string')
weather_df['precip_category'] = weather_df['precip_category'].astype('string')
weather_df['humid_category'] = weather_df['humid_category'].astype('string')
weather_df['wind_level'] = weather_df['wind_level'].astype('string')



In [20]:
weather_df['temp'].describe()

count    365.000000
mean      56.580548
std       17.303147
min       11.100000
25%       41.800000
50%       56.900000
75%       71.500000
max       87.300000
Name: temp, dtype: float64

In [21]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   datetime              365 non-null    datetime64[ns]
 1   tempmax               365 non-null    float64       
 2   tempmin               365 non-null    float64       
 3   temp                  365 non-null    float64       
 4   humidity              365 non-null    float64       
 5   precip                365 non-null    float64       
 6   preciptype            365 non-null    int64         
 7   snow                  365 non-null    int64         
 8   windspeed             365 non-null    float64       
 9   visibility            365 non-null    float64       
 10  severerisk            365 non-null    float64       
 11  sunrise               365 non-null    object        
 12  sunset                365 non-null    object        
 13  icon                

In [22]:
weather_df.to_csv('../Resources/weather_classified_df.csv', index=False)

In [23]:
weather_df.groupby('temperature_category').size()

temperature_category
Cold              71
Freezing Cold      6
Hot               29
Moderate         127
Warm             132
dtype: int64

In [26]:
engine = create_engine('sqlite:///../Resources/collision_db.sqlite')

In [24]:
metadata = MetaData()

In [46]:
weather_info = Table('weather_2022_info', metadata, 
                     Column('datetime', DateTime, primary_key=True),
                     Column('tempmax', Float),
                     Column('tempmin', Float),
                     Column('temp', Float),
                     Column('humidity', Float),
                     Column('precip', Float),
                     Column('preciptype', Integer),
                     Column('snow', Integer),
                     Column('windspeed', Float),
                     Column('visibility', Float),
                     Column('severerisk', Float),
                     Column('sunrise', String),
                     Column('sunset', String),
                     Column('icon', String),
                     Column('temperature_category', String),
                     Column('precip_category', String),
                     Column('number_of_motorist_killed', Integer),
                     Column('humid_category', String),
                     Column('wind_level', String),
                     )
metadata.create_all(engine)


InvalidRequestError: Table 'weather_2022_info' is already defined for this MetaData instance.  Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

In [50]:
weather_df.to_sql('weather_2022_info', con=engine, if_exists='replace', index=False)

365

In [51]:
from sqlalchemy import inspect

inspector = inspect(engine)
print(inspector.get_table_names())  # Check if the table is listed
print(inspector.get_columns('weather_2022_info'))  # Check the structure of the table


['motor_collisions', 'weather_2022_info']
[{'name': 'datetime', 'type': DATETIME(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'tempmax', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'tempmin', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'temp', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'humidity', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'precip', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'preciptype', 'type': BIGINT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'snow', 'type': BIGINT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'windspeed', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'visibility', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'severerisk', 'type': FLOAT(),