In [None]:
#pip install folium


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import folium

# Load data
temperature_df = pd.read_csv(r"C:\Users\shiby\Downloads\temperature.csv")
binsize_df = pd.read_csv(r"C:\Users\shiby\Downloads\BinSize.csv")

# Display first few rows to understand the structure
#print(temperature_df.head())
print(binsize_df.head())


In [None]:
# Convert 'date' to datetime format
temperature_df['Date'] = pd.to_datetime(temperature_df['Date'])

# Extract year and day of the year
temperature_df['year'] = temperature_df['Date'].dt.year
temperature_df['day_of_year'] = temperature_df['Date'].dt.dayofyear

# Remove leap day (Feb 29)
temperature_df = temperature_df[~((temperature_df['Date'].dt.month == 2) & (temperature_df['Date'].dt.day == 29))]

# Separate data for 2005-2014 and 2015
data_2005_2014 = temperature_df[(temperature_df['year'] >= 2005) & (temperature_df['year'] <= 2014)]
data_2015 = temperature_df[temperature_df['year'] == 2015]

# Convert temperature from tenths of degrees C to degrees C
data_2005_2014['value'] = data_2005_2014['Data_Value'] / 10.0
data_2015['value'] = data_2015['Data_Value'] / 10.0


In [None]:
data_2005_2014.tail()

In [None]:
# Calculate record highs and lows for each day of the year from 2005-2014
record_highs = data_2005_2014[data_2005_2014['Element'] == 'TMAX'].groupby('day_of_year')['value'].max()
record_lows = data_2005_2014[data_2005_2014['Element'] == 'TMIN'].groupby('day_of_year')['value'].min()


In [None]:
data_2015.head()

In [None]:
# Create a line graph showing record highs and lows with shaded area between
plt.figure(figsize=(15, 10))
plt.plot(record_highs.index, record_highs.values, label='Record High (2005-2014)', color='red')
plt.plot(record_lows.index, record_lows.values, label='Record Low (2005-2014)', color='blue')
plt.fill_between(record_highs.index, record_lows.values, record_highs.values, color='grey', alpha=0.3)

# Customize the plot
plt.title('Record High and Low Temperatures (2005-2014) near Ann Arbor, Michigan')
plt.xlabel('Day of Year')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.grid(True)

# Set x-ticks to show months
plt.xticks(np.linspace(1, 365, num=13), 
           ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan'])

plt.show()



In [None]:
# Filter for the period 2005-2014
record_period = temperature_df[(temperature_df['year'] >= 2005) & (temperature_df['year'] <= 2014)]


# Calculate record highs and lows for each day of the year
record_highs = record_period[record_period['Element'] == 'TMAX'].groupby('day_of_year')['Data_Value'].max()
record_lows = record_period[record_period['Element'] == 'TMIN'].groupby('day_of_year')['Data_Value'].min()

# Filter for the year 2015
data_2015 = temperature_df[temperature_df['year'] == 2015]


# Identify 2015 data points that broke the records
highs_2015 = data_2015[data_2015['Element'] == 'TMAX']
lows_2015 = data_2015[data_2015['Element'] == 'TMIN']

highs_2015_broken = highs_2015[highs_2015['Data_Value'] > highs_2015['day_of_year'].map(record_highs)]
lows_2015_broken = lows_2015[lows_2015['Data_Value'] < lows_2015['day_of_year'].map(record_lows)]

# Plotting
plt.figure(figsize=(15, 10))

# Plot record highs and lows
days = np.arange(1, 366)  # Days of the year from 1 to 365
plt.plot(days, record_highs.reindex(days).values / 10, label='Record Highs (2005-2014)', color='red')
plt.plot(days, record_lows.reindex(days).values / 10, label='Record Lows (2005-2014)', color='blue')

# Fill between the record highs and lows
plt.fill_between(days, record_lows.reindex(days).values / 10, record_highs.reindex(days).values / 10, facecolor='grey', alpha=0.3)

# Overlay 2015 record-breaking data points
plt.scatter(highs_2015_broken['day_of_year'], highs_2015_broken['Data_Value'] / 10, color='red', label='2015 Record Breaking Highs')
plt.scatter(lows_2015_broken['day_of_year'], lows_2015_broken['Data_Value'] / 10, color='blue', label='2015 Record Breaking Lows')

# Formatting
plt.xlabel('Day of the Year')
plt.ylabel('Temperature (°C)')
plt.title('Record High and Low Temperatures (2005-2014) with 2015 Record Breakers')
plt.legend()
plt.grid(True)

# Set x-ticks to show months
plt.xticks(np.linspace(1, 365, num=13), 
           ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan'])

plt.show()


# inference:

## In the mid to end of february& march starting in 2015 ,  the days  have crossed the record_low temperature created  in the 10 year(2004-2015) period, thus having cooler days in february and march.

##  In the december month of 2015 , the days have crossed  the record-high temperature created in 10 year period(2004-2015) thus having hotter days in december.


In [None]:
# Visualize the stations on a map
station_locations = binsize_df[['ID','LATITUDE','LONGITUDE']].drop_duplicates()

# Create a map centered around Ann Arbor, Michigan
ann_arbor_map = folium.Map(location=[42.2808, -83.7430], zoom_start=10)

# Add station markers to the map
for _, row in station_locations.iterrows():
    folium.Marker(location=[row['LATITUDE'], row['LONGITUDE']], popup=row['ID']).add_to(ann_arbor_map)

# Display the map
ann_arbor_map


In [None]:
import matplotlib.dates as mdates

tmax_2015 = data_2015[data_2015['Element'] == 'TMAX']
tmin_2015 = data_2015[data_2015['Element'] == 'TMIN']



# Resample data by month and calculate mean temperature
monthly_tmax_2015 = tmax_2015.resample('M', on='Date').mean()
monthly_tmin_2015 = tmin_2015.resample('M', on='Date').mean()

# Generate a list of the first day of each month for 2015
months = pd.date_range(start='2015-01-01', end='2015-12-31', freq='MS')

# Plotting the data
plt.figure(figsize=(12, 6))

bar_width = 10
offset = bar_width / 2

# Plot TMAX and TMIN as bar plots with offset
plt.bar(months - pd.DateOffset(days=offset), monthly_tmax_2015['Data_Value']/10, width=bar_width, label='TMAX', color='red', alpha=0.6)
plt.bar(months + pd.DateOffset(days=offset), monthly_tmin_2015['Data_Value']/10, width=bar_width, label='TMIN', color='blue', alpha=0.6)


# Formatting the x-axis
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b'))

plt.xlabel('Month')
plt.ylabel('Temperature (°C)')
plt.title('Average Monthly Temperatures in 2015 near Ann Arbor, Michigan')
plt.legend()
plt.grid(True)

# Set the x-ticks to the start of each month
plt.xticks(months)

plt.show()

 

# inference

## from the above graph we have a pattern of temperature rising from negative to positive from january to december having the highest minimum temperature in february and  max high temperature in july

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(data=[tmax_2015['Data_Value']/10, tmin_2015['Data_Value']/10], palette='coolwarm')
plt.xticks([0, 1], ['TMAX', 'TMIN'])
plt.ylabel('Temperature (°C)')
plt.title('Distribution of Daily Temperatures in 2015 near Ann Arbor, Michigan')
plt.grid(True)
plt.show()


# inference

## the maximum temperature is centered around 8-25 degree celsius having  17 as  the mean.
## the minimum temperature is centered around -2 to 12 degree celsius having 5 as the mean

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Resample data by week and calculate mean temperature
weekly_tmax_2015 = tmax_2015.resample('W-MON', on='Date').mean().reset_index()
weekly_tmin_2015 = tmin_2015.resample('W-MON', on='Date').mean().reset_index()

# Create subplots
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(20, 15), sharex=True, sharey=True)
fig.suptitle('Weekly Average Maximum and Minimum Temperatures in 2015 near Ann Arbor, Michigan', fontsize=20)

# Plot data for each month
for i, month in enumerate(range(1, 13)):
    ax = axes[i // 4, i % 4]
    
    month_tmax = weekly_tmax_2015[weekly_tmax_2015['Date'].dt.month == month]
    month_tmin = weekly_tmin_2015[weekly_tmin_2015['Date'].dt.month == month]
    
    ax.scatter(month_tmax['Date'].dt.day, month_tmax['Data_Value']/10, color='red', label='TMAX', alpha=0.6)
    ax.scatter(month_tmin['Date'].dt.day, month_tmin['Data_Value']/10, color='blue', label='TMIN', alpha=0.6)
    
    ax.set_title(pd.to_datetime(month, format='%m').strftime('%B'))
    ax.set_xlabel('Day')
    ax.set_ylabel('Temperature (°C)')
    ax.grid(True)

# Adjust layout and add legend
plt.tight_layout(rect=[0, 0, 1, 0.95])
fig.legend(['TMAX', 'TMIN'], loc='upper right')
plt.show()


# inference:

## june, july, august,september being the hottest months having the min and max temperature consistenly above 10 and 20 degree celsius respectively
## january, february being the coldest months with february being the most coldest  having tmin consistenly below -10 degree celsius and tmax below  0 degree celsius.


In [None]:
plt.figure(figsize=(12, 6))
#tmax_2015['value'].hist(alpha=0.5, bins=30, color='red', label='TMAX')

n, bins, patches = plt.hist(tmin_2015['Data_Value']/10, bins=30, alpha=0.5, color='blue', label='TMIN')
max_freq = n.max()
max_bin = bins[n.argmax()]

# Label the most frequent bin
plt.text(max_bin, max_freq, f'\n,Max_bin\n{max_bin}', ha='center', va='bottom', color='black')

# Add labels and title
plt.xlabel('Temperature (°C)')
plt.ylabel('Frequency')
plt.title('Histogram of  Minimum Daily Temperatures in 2015')
plt.legend()
plt.show()


# inference:

## The max count for the minimum daily temperature in 2015 (above 600) is for 12.66 degree celsius

In [None]:
plt.figure(figsize=(12, 6))

n, bins, patches = plt.hist(tmax_2015['Data_Value']/10, bins=30, alpha=0.5, color='red', label='TMAX')
max_freq = n.max()
max_bin = bins[n.argmax()]

# Label the most frequent bin
plt.text(max_bin, max_freq, f'\n,Max_bin\n{max_bin}', ha='center', va='bottom', color='black')

# Add labels and title
plt.xlabel('Temperature (°C)')
plt.ylabel('Frequency')
plt.title('Histogram of Maximum Daily Temperatures in 2015')
plt.legend()
plt.show()

# inference:

## The max count for the maximum daily temperature in 2015 (above 580) is for 25.66 degree celsius