### Import Libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.color_palette("Spectral")


### Load Data

In [None]:
url='https://drive.google.com/file/d/1xGpWDasU_trDUkCeRDctpAxNfe8R7A5o/view?usp=sharing'
url='https://drive.google.com/uc?id=' + url.split('/')[-2]
df = pd.read_csv(url)
df=df.drop(['Unnamed: 0'],axis=1)

In [None]:
df.shape

In [None]:
# basic descriptive statistics for numerical features 
df.describe()

In [None]:
# basic descriptive statistics for categorical features
df.describe(include='O')

In [None]:
df.corr().style.background_gradient(cmap="Spectral")

# Explore deeply



# 1-Singel Features

## Categorical

### This graph shows all weather conditions in the data set.

In [None]:
city_weather = df.groupby(['weather']).size().sort_values(ascending=False)
plt.figure(figsize=(15,6))
sns.barplot(x=city_weather[:9].index, y=city_weather[:9],palette="Spectral")
plt.title('Most Frequent Weather Conditions')

### This plot shows the number of entries for each province.

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
sns.countplot(x=df["city"], ax=ax,palette="Spectral")
ax.set(title='Provinces distribution in the dataset ')

### This graph shows the years for which the data was collected.

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
sns.countplot(x=df["year"], ax=ax,palette="Spectral")
ax.set(title='Weather Records Per Year')

### This graph shows the months for which the data was collected.

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
sns.countplot(x=df["month"], ax=ax,palette="Spectral")
ax.set(title='Weather Records Per Year')

## Numerical


### This function plot the distribution for all numerical columns in the data

In [None]:
def distrubtion_mun(df,column):
    fig = plt.figure(figsize=(8,5))
    sns.histplot(data=df,x=column ,color='#47A0B3')
    plt.xlabel(column,fontsize=20) # x axis
    plt.ylabel('Counts',fontsize=20) # y axis  
    plt.legend();
    plt.tight_layout()
    plt.show()

In [None]:
ls=['temp', 'wind', 'humidity', 'barometer', 'visibility']
for col in ls:
  distrubtion_mun(df,col)

# 2-Multiple Features

In [None]:
 # this dummy df contains only the first three occuring weather conditions
dummydf= df.loc[(df['weather'] =='Clear' )| (df['weather'] =='Sunny')| (df['weather'] =='Cloudy')]
 # this dummy df contains only the second three occuring weather conditions
dummydf2=df.loc[(df['weather'] =='Sandstorm' )| (df['weather'] =='Overcast')| (df['weather'] =='Rain')]
 # this dummy df contains only the last three most occuring weather conditions
dummydf1=df.loc[(df['weather'] =='Fog' )| (df['weather'] =='Haze')| (df['weather'] =='Duststorm')]

### This graph shows three weather conditions: clear, sunny and cloudy through the months of the year. 

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
dummydf['month']=dummydf['month'].astype('str')
sns.histplot(data=dummydf, x = 'month', hue= 'weather', multiple='dodge', legend=True,palette='Spectral')
ax.set_title('the frequency of the most occuring weather (Clear, Sunny, Cloudy ) conditions in each province')
plt.xticks(rotation=70)

### This graph shows three weather conditions: clear, sunny and cloudy in each city. 

In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
sns.histplot(data=dummydf1, x = 'city', hue= 'weather', multiple='dodge', legend=True,palette='Spectral')
ax.set_title('the frequency of the most occuring weather (Fog, Haze, Duststorm) conditions in each province')
plt.xticks(rotation=70)


### This graph shows all weather condition with humidity values


In [None]:
fig, ax = plt.subplots(figsize=(15, 8))
sns.barplot(data= df, y= 'humidity',x= 'weather', label='hum', ax=ax,palette="Spectral") 
plt.show("Humidity values for Each weather condition ")
plt.show()

### This boxplots shows the distributions of temperatures speed in each city.

In [None]:
plt.figure(figsize=(16,8))
sns.boxplot(y='city', x='temp', data = df,palette="Spectral")
plt.title('The Range of Temperatures for Each Province')
plt.xlabel('Temperatures in C')
plt.show()

### This boxplots shows the distributions of wind speed in each city.

In [None]:
plt.figure(figsize=(16,8))
sns.boxplot(x ='wind', y ='city', data = df,palette="Spectral")
plt.title('Range of Wind Speed Per Province')
plt.xlabel('Wind')
plt.show()

### This boxplots shows the distributions of atmospheric pressure in each city.

In [None]:
plt.figure(figsize=(16,10))
sns.boxplot(x ='barometer', y ='city', data = df,palette="Spectral")
plt.title('Range of Atmospheric Pressure Per Province')
plt.xlabel('Barometer')
plt.show()


### This boxplots shows the distributions of humidity percentage in each city.

In [None]:
plt.figure(figsize=(16,8))
sns.boxplot(x ='humidity', y ='city', data = df,palette="Spectral")
plt.title('Range of Humidity Dagree Per City')
plt.xlabel('Humidity %')
plt.show()

### This graph shows the humidity percentage, wind speed and temperature in each through the hours of day.


In [None]:
fig = plt.figure(figsize=(10,8))
sns.lineplot(y='humidity',x='hour',data=df, label='Humidity Percentage')
sns.lineplot(y='temp',x='hour',data=df, label='Temperature In C')
sns.lineplot(y='wind',x='hour',data=df, label='Wind Speed In km/h')
plt.xlabel('hour and their Quarters',fontsize=20) 
plt.xticks(rotation=70)
plt.legend()
plt.title("hour Quarters and Weather Attributes")
plt.ylabel('Value',fontsize=20) 
plt.tight_layout()
plt.show()

### This graph shows the humidity percentage, wind speed and temperature through all weather conditions.


In [None]:
fig = plt.figure(figsize=(10,8))
sns.lineplot(data=df, x= 'weather', y= 'wind', label='Wind Speed In km/h')
sns.lineplot(data=df, x= 'weather', y= 'humidity', label='Humidity %')
sns.lineplot(data=df, x= 'weather', y= 'visibility', label= 'Visiblity')

### This graph shows Distribution of humidity and atmospheric pressure through the months of year.


In [None]:
fig = plt.figure(figsize=(10,8))
sns.scatterplot(x='humidity',y='barometer',hue='month',data=df,palette='Spectral')
plt.xlabel('Humidity') # x axis
plt.xticks(rotation=70)
plt.ylabel('Barometer') # y axis  
plt.title("Distribution of Humidity and Atmospheric pressure, through the months of year")
plt.legend();
plt.show()


### This graph shows Distribution of temperature through all weather conditions.


In [None]:
plt.figure(figsize=(20,20))
sns.catplot(y='temp',x='weather',hue ='month',data=df)
plt.xticks(rotation=70)
plt.show()

# 3-Multi-graphs

### This graph shows all weather conditions in each city. 

In [None]:
fig, ax = plt.subplots(3,1,figsize=(15, 30))# three subplots stacked veritically
#plot 1
sns.histplot(data=dummydf, x = 'city', hue= 'weather', ax=ax[0], multiple='dodge', legend=True,palette='Spectral')
ax[0].set_title('the frequency of the most occuring weather conditions in each city')
#plot 2
sns.histplot(data=dummydf1, x = 'city', hue= 'weather', ax=ax[1], multiple='dodge', legend=True,palette='Spectral')
ax[1].set_title('the frequency of  less occuring weather conditions in each city')
#plot 3
sns.histplot(data=dummydf2, x = 'city', hue= 'weather', ax=ax[2], multiple='dodge', legend=True,palette='Spectral')
ax[2].set_title('the frequency of the least occuring weather conditions in each city')
#super title
fig.suptitle("The frequency of weather conditions in each city")
fig.show()


### This graph shows the humidity percentage, wind speed, visibility and temperature in each through the three weather conditions.


In [None]:
fig, ax = plt.subplots(2,2,figsize=(35, 20))
sns.histplot(data=dummydf,x='temp', hue='weather',binwidth=2, element='poly', ax=ax[0,0] )
ax[0,0].set_title("Distribuion of temprature according to the weather condition",fontsize=25)
ax[0,0].set_xlabel('Temprature In C',fontsize=25)
ax[0,0].set_ylabel('Frequency',fontsize=25)

sns.histplot(data=dummydf,x='wind', hue='weather',binwidth=2, element='poly', ax=ax[0,1] )
ax[0,1].set_title("Distribuion of Wind Speed according to the weather condition",fontsize=25)
ax[0,1].set_xlabel('Wind Speed in km/h',fontsize=25)
ax[0,1].set_ylabel('Frequency',fontsize=25)

sns.histplot(data=dummydf,x='visibility', hue='weather',binwidth=2, element='poly', ax=ax[1,0] )
ax[1,0].set_title("Distribuion of visibility according to the weather condition",fontsize=25)
ax[1,0].set_xlabel('Visibility A',fontsize=25 )
ax[1,0].set_ylabel('Frequency',fontsize=25)

sns.histplot(data=dummydf,x='humidity', hue='weather',binwidth=2, element='poly', ax=ax[1,1] )
ax[1,1].set_title("Distribuion of Humidity according to the weather condition",fontsize=25)
ax[1,1].set_xlabel('Humidity %',fontsize=25)
ax[1,1].set_ylabel('Frequency',fontsize=25)

fig.suptitle('Distrubtion of different weather metrics & attributes according to the most occuring weather conditions',fontsize=25)
fig.show()

### This graph shows the humidity percentage, wind speed, temperature and atmospheric pressure through the hours of day.

In [None]:
fig, ax = plt.subplots(2,2,figsize=(15, 15)) # four sublplots arranged in a square
#plot 1
sns.lineplot(data=df, y = 'temp', x='hour', ax=ax[0,0] )
ax[0,0].set_title("Measurments of temprature according to the hour")
ax[0,0].set_xlabel('Hour')
ax[0,0].set_ylabel('Temprature In C')

#plot 2
sns.lineplot(data=df, y = 'wind', x='hour', ax=ax[0,1] )
ax[0,1].set_title("Measurments  of Wind Speed according to hour")
ax[0,1].set_xlabel('Hour')
ax[0,1].set_ylabel('Wind Speed in km/h')

#plot 3
sns.lineplot(data=df, y = 'barometer', x='hour', ax=ax[1,0] )
ax[1,0].set_title("Measurments of Barometer measurments according to the  hour")
ax[1,0].set_ylabel('Barometer measurments in atm')
ax[1,0].set_xlabel('Hour')
#plot 4
sns.lineplot(data=df, y = 'humidity', x='hour', ax=ax[1,1] )
ax[1,1].set_title("Measurments  of Humidity according to the hour")
ax[1,0].set_xlabel('Hour')
ax[1,1].set_ylabel('Humidity %' )

#super title
fig.suptitle('Meadurment of weather metrics & attributes according to the hour')
fig.show()