In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

#Key Questions?

1.What is the average temperature over the entire dataset?

2.What is the highest and lowest temperature recorded?

3.How does the temperature vary over time (e.g., daily)?

4.How does the temperature vary over time (e.g., Monthly)?

5.Is there a correlation between temperature and other weather parameters like humidity or wind speed?

6.How often does it rain or snow?

7.What is the distribution of wind speed values?

In [None]:
df=pd.read_csv('/content/drive/MyDrive/Data/1. Weather Data.csv')

In [None]:
df

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog
...,...,...,...,...,...,...,...,...
8779,12/31/2012 19:00,0.1,-2.7,81,30,9.7,100.13,Snow
8780,12/31/2012 20:00,0.2,-2.4,83,24,9.7,100.03,Snow
8781,12/31/2012 21:00,-0.5,-1.5,93,28,4.8,99.95,Snow
8782,12/31/2012 22:00,-0.2,-1.8,89,28,9.7,99.91,Snow


In [None]:
df.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog


In [None]:
df.shape

(8784, 8)

In [None]:
df.describe()

Unnamed: 0,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa
count,8784.0,8784.0,8784.0,8784.0,8784.0,8784.0
mean,8.798144,2.555294,67.431694,14.945469,27.664447,101.051623
std,11.687883,10.883072,16.918881,8.688696,12.622688,0.844005
min,-23.3,-28.5,18.0,0.0,0.2,97.52
25%,0.1,-5.9,56.0,9.0,24.1,100.56
50%,9.3,3.3,68.0,13.0,25.0,101.07
75%,18.8,11.8,81.0,20.0,25.0,101.59
max,33.0,24.4,100.0,83.0,48.3,103.65


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8784 entries, 0 to 8783
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Date/Time         8784 non-null   object 
 1   Temp_C            8784 non-null   float64
 2   Dew Point Temp_C  8784 non-null   float64
 3   Rel Hum_%         8784 non-null   int64  
 4   Wind Speed_km/h   8784 non-null   int64  
 5   Visibility_km     8784 non-null   float64
 6   Press_kPa         8784 non-null   float64
 7   Weather           8784 non-null   object 
dtypes: float64(4), int64(2), object(2)
memory usage: 549.1+ KB


In [None]:
df.isnull().sum()

Unnamed: 0,0
Date/Time,0
Temp_C,0
Dew Point Temp_C,0
Rel Hum_%,0
Wind Speed_km/h,0
Visibility_km,0
Press_kPa,0
Weather,0


#1.What is the average temperature over the entire dataset?

In [None]:
average_temperature = df['Temp_C'].mean()
print(f"The average temperature over the entire dataset is: {average_temperature:.2f}°C")

The average temperature over the entire dataset is: 8.80°C


In [None]:
fig = px.histogram(df, x="Temp_C", nbins=20, title="Distribution of Temperatures")
fig.update_layout(xaxis_title="Temperature (°C)", yaxis_title="Count")
fig.show()

#2.What is the highest and lowest temperature recorded?

In [None]:
highest_temp = df['Temp_C'].max()
lowest_temp = df['Temp_C'].min()

print(f"Highest Temperature Recorded: {highest_temp:.2f}°C")
print(f"Lowest Temperature Recorded: {lowest_temp:.2f}°C")

Highest Temperature Recorded: 33.00°C
Lowest Temperature Recorded: -23.30°C


In [None]:
fig = px.histogram(df, x="Temp_C", title="Count Plot of Temperatures")

fig.update_layout(
    xaxis_title="Temperature (°C)",
    yaxis_title="Count",
    bargap=0.1)

fig.add_vline(x=highest_temp, line_width=3, line_dash="dash", line_color="red")
fig.add_vline(x=lowest_temp, line_width=3, line_dash="dash", line_color="blue")

fig.show()

#3.How does the temperature vary over time (e.g., daily)?

In [None]:
df['Date/Time'] = pd.to_datetime(df['Date/Time'])

daily_avg_temp = df.resample('D', on='Date/Time')['Temp_C'].mean().reset_index()

fig = px.bar(daily_avg_temp, x='Date/Time', y='Temp_C', title='Daily Average Temperature')
fig.update_layout(xaxis_title='Date', yaxis_title='Temperature (°C)')
fig.show()


#4.How does the temperature vary over time (e.g., Monthly)?

In [None]:
monthly_avg_temp = df.resample('M', on='Date/Time')['Temp_C'].mean().reset_index()

In [None]:
fig = px.bar(monthly_avg_temp, x='Date/Time', y='Temp_C', title='Monthly Average Temperature')
fig.update_layout(xaxis_title='Month', yaxis_title='Temperature (°C)')
fig.show()

#5.Is there a correlation between temperature and other weather parameters like humidity or wind speed?

In [None]:
# Calculate the correlation between temperature and humidity
temp_humidity_corr = df['Temp_C'].corr(df['Rel Hum_%'])

temp_wind_corr = df['Temp_C'].corr(df['Wind Speed_km/h'])

print(f"Correlation between Temperature and Humidity: {temp_humidity_corr:.2f}")
print(f"Correlation between Temperature and Wind Speed: {temp_wind_corr:.2f}")

Correlation between Temperature and Humidity: -0.22
Correlation between Temperature and Wind Speed: -0.06


In [None]:
fig = px.pie(df, names='Weather', hole=0.5, title='Weather Condition Distribution')


fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')

fig.show()

#6.How often does it rain or snow?

In [None]:
rain_snow_data = df[df['Weather'].str.contains('Rain|Snow', case=False, na=False)]

In [None]:
total_observations = len(df)
rain_snow_observations = len(rain_snow_data)
rain_snow_frequency = (rain_snow_observations / total_observations) * 100

print(f"Rain or snow occurs approximately {rain_snow_frequency:.2f}% of the time.")

Rain or snow occurs approximately 14.17% of the time.


In [None]:
fig = px.histogram(df, x="Weather", title="Count Plot of Weather Conditions")
fig.update_layout(
    xaxis_title="Weather Condition",
    yaxis_title="Count",
    bargap=0.1  # Adjust spacing between bars
)
fig.show()

#7.What is the distribution of wind speed values?

In [None]:
fig = px.histogram(df, x="Wind Speed_km/h", nbins=20, title="Distribution of Wind Speeds")
fig.update_layout(xaxis_title="Wind Speed (km/h)", yaxis_title="Frequency")
fig.show()

#Conclusion:-

In conclusion, a global weather analysis provides critical insights into the patterns and trends shaping the Earth's climate. By examining temperature variations, precipitation patterns, wind speeds, and humidity levels, we gain a deeper understanding of the changing climate and its impacts on ecosystems and human activities. The data reveals both regional and global trends, such as the intensification of extreme weather events, shifts in seasonal patterns, and the growing unpredictability of weather systems. Temperature trends, for instance, highlight the rise in global temperatures, which is contributing to phenomena like melting ice caps, more frequent heatwaves, and shifting agricultural zones.

The frequency and intensity of rainfall and snowfall offer clues about changing precipitation patterns, while wind speed distributions help assess the strength of storms and other atmospheric phenomena. Furthermore, correlations between temperature, humidity, and wind speed provide a more comprehensive understanding of how these elements interact in the atmosphere.

Overall, global weather analysis serves as a vital tool for improving weather forecasting, managing climate-related risks, and informing policy decisions that aim to mitigate the impacts of climate change, ultimately guiding efforts to protect both people and the planet.







