<a href="https://colab.research.google.com/github/REHAB199/Saudi-Arabia-Weather-Deep-learning/blob/main/Code/Saudi_Arabia_Weather_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 ## Import the libraries

 ---

In [1]:
# !pip install pandas==0.23.4

In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import rcParams

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import warnings
warnings.filterwarnings('ignore')

import scipy.stats

import pandas_profiling

## Loading the data

---

In [3]:
df = pd.read_csv('/content/weather-sa-2017-2019-clean.csv')
df.shape

(249023, 15)

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,city,date,time,year,month,day,hour,minute,weather,temp,wind,humidity,barometer,visibility
0,0,Qassim,1 January 2017,00:00,2017,1,1,24,0,Clear,17,11,64%,1018.0,16
1,1,Qassim,1 January 2017,01:00,2017,1,1,1,0,Clear,17,6,64%,1018.0,16
2,2,Qassim,1 January 2017,03:00,2017,1,1,3,0,Clear,15,11,72%,1019.0,16
3,3,Qassim,1 January 2017,04:00,2017,1,1,4,0,Clear,15,11,72%,1019.0,16
4,4,Qassim,1 January 2017,05:00,2017,1,1,5,0,Clear,15,9,72%,1019.0,16


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249023 entries, 0 to 249022
Data columns (total 15 columns):
Unnamed: 0    249023 non-null int64
city          249023 non-null object
date          249023 non-null object
time          249023 non-null object
year          249023 non-null int64
month         249023 non-null int64
day           249023 non-null int64
hour          249023 non-null int64
minute        249023 non-null int64
weather       249023 non-null object
temp          249023 non-null int64
wind          249023 non-null int64
humidity      249006 non-null object
barometer     248951 non-null float64
visibility    249023 non-null int64
dtypes: float64(1), int64(9), object(5)
memory usage: 28.5+ MB


## data cleaning

---

In [6]:
# convert date to datetime type
df['date'] = pd.to_datetime(df['date'])

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249023 entries, 0 to 249022
Data columns (total 15 columns):
Unnamed: 0    249023 non-null int64
city          249023 non-null object
date          249023 non-null datetime64[ns]
time          249023 non-null object
year          249023 non-null int64
month         249023 non-null int64
day           249023 non-null int64
hour          249023 non-null int64
minute        249023 non-null int64
weather       249023 non-null object
temp          249023 non-null int64
wind          249023 non-null int64
humidity      249006 non-null object
barometer     248951 non-null float64
visibility    249023 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(9), object(4)
memory usage: 28.5+ MB


In [8]:
# Unnamed: 0 is irrlevent 
delete_col =['Unnamed: 0']
df.drop(delete_col, axis =1 , inplace =True)

In [9]:
# convert feature humidity to Numerical
df['humidity'] = df['humidity'].str.replace('%','').astype('float')

In [10]:
# Check duplicate 
duplicate = df.index.duplicated()
print('Number of duplicate =',duplicate.sum())

Number of duplicate = 0


In [11]:
df.isnull().sum()

city           0
date           0
time           0
year           0
month          0
day            0
hour           0
minute         0
weather        0
temp           0
wind           0
humidity      17
barometer     72
visibility     0
dtype: int64

Dealing with missing values

In [12]:
# now filling the missing values with means for each city
df['humidity'] = df['humidity'].fillna(df.groupby('city')['humidity'].transform('mean'))

In [13]:
df['barometer'] = df['barometer'].fillna(df.groupby('city')['barometer'].transform('mean'))

In [14]:
df.isnull().sum()

city          0
date          0
time          0
year          0
month         0
day           0
hour          0
minute        0
weather       0
temp          0
wind          0
humidity      0
barometer     0
visibility    0
dtype: int64

In [15]:
df.describe()

Unnamed: 0,year,month,day,hour,minute,temp,wind,humidity,barometer,visibility
count,249023.0,249023.0,249023.0,249023.0,249023.0,249023.0,249023.0,249023.0,249023.0,249023.0
mean,2017.710007,6.050694,15.691081,12.53689,0.131108,24.722624,12.957104,37.553192,1015.454041,11.053453
std,0.706113,3.521591,8.787958,6.910254,1.97071,8.880913,8.711619,23.592329,6.970366,7.053005
min,2017.0,1.0,1.0,1.0,0.0,-4.0,-1.0,0.0,904.0,-1.0
25%,2017.0,3.0,8.0,7.0,0.0,18.0,7.0,17.0,1011.0,5.0
50%,2018.0,6.0,16.0,13.0,0.0,24.0,11.0,32.0,1016.0,16.0
75%,2018.0,9.0,23.0,19.0,0.0,31.0,19.0,55.0,1021.0,16.0
max,2019.0,12.0,31.0,24.0,59.0,50.0,163.0,100.0,1101.0,161.0


In [16]:
df['weather'].unique()

array(['Clear ', 'Sunny ', 'Scattered clouds ', 'Partly sunny ',
       'Passing clouds ', 'Refreshingly cool ', 'Low level haze ',
       'Duststorm ', 'Thunderstorms  Passing clouds ', 'Fog ',
       'Thunderstorms  Partly sunny ', 'Light rain  Partly sunny ',
       'Dense fog ', 'Thunderstorms  Scattered clouds ',
       'Rain  Passing clouds ', 'Extremely hot ', 'Rain  Partly sunny ',
       'Pleasantly warm ', 'Hot ', 'Mild ', 'Overcast ',
       'Rain  Overcast ', 'Smoke ', 'Thunderstorms  Broken clouds ',
       'Heavy rain  Partly sunny ', 'Thunderstorms  Overcast ',
       'Light rain  Overcast ', 'Warm ', 'Thunderstorms  Cloudy ',
       'Drizzle  Overcast ', 'Thunderstorms  Partly cloudy ',
       'Broken clouds ', 'Sandstorm ', 'Partly cloudy ', 'Mostly cloudy ',
       'Rain  Partly cloudy ', 'Rain  Broken clouds ',
       'Rain  Scattered clouds ', 'Haze ', 'Rain  Mostly cloudy ',
       'Hail  Partly sunny ', 'Thundershowers  Passing clouds ',
       'Thunderstorms  Mor

In [17]:
df.weather = df.weather.str.strip()
df.weather = df.weather.replace('\s+', ' ', regex=True)
df.weather = df.weather.str.title()
df.weather.unique()

array(['Clear', 'Sunny', 'Scattered Clouds', 'Partly Sunny',
       'Passing Clouds', 'Refreshingly Cool', 'Low Level Haze',
       'Duststorm', 'Thunderstorms Passing Clouds', 'Fog',
       'Thunderstorms Partly Sunny', 'Light Rain Partly Sunny',
       'Dense Fog', 'Thunderstorms Scattered Clouds',
       'Rain Passing Clouds', 'Extremely Hot', 'Rain Partly Sunny',
       'Pleasantly Warm', 'Hot', 'Mild', 'Overcast', 'Rain Overcast',
       'Smoke', 'Thunderstorms Broken Clouds', 'Heavy Rain Partly Sunny',
       'Thunderstorms Overcast', 'Light Rain Overcast', 'Warm',
       'Thunderstorms Cloudy', 'Drizzle Overcast',
       'Thunderstorms Partly Cloudy', 'Broken Clouds', 'Sandstorm',
       'Partly Cloudy', 'Mostly Cloudy', 'Rain Partly Cloudy',
       'Rain Broken Clouds', 'Rain Scattered Clouds', 'Haze',
       'Rain Mostly Cloudy', 'Hail Partly Sunny',
       'Thundershowers Passing Clouds',
       'Thunderstorms More Clouds Than Sun', 'More Clouds Than Sun',
       'Light Rain 

In [18]:
df.city.unique()

array(['Qassim', 'Hail', 'Madina', 'EP', 'Riyadh', 'Mecca', 'Tabuk',
       'Assir', 'Northern boarder', 'Jazan', 'Najran', 'Baha', 'Jawf'],
      dtype=object)

In [19]:
df.city.nunique()

13

In [20]:
df.city.replace({'EP':'Eastern Province'}, inplace=True)

In [21]:
df.city.unique()

array(['Qassim', 'Hail', 'Madina', 'Eastern Province', 'Riyadh', 'Mecca',
       'Tabuk', 'Assir', 'Northern boarder', 'Jazan', 'Najran', 'Baha',
       'Jawf'], dtype=object)

In [22]:
s = df.groupby(['city'])['temp'].mean().sort_values(ascending = False) #.nlargest(5)
s

city
Mecca               29.023239
Madina              28.708740
Riyadh              27.957981
Eastern Province    27.215077
Qassim              25.961350
Hail                25.927837
Northern boarder    24.978799
Baha                23.468110
Jawf                23.101268
Tabuk               22.638735
Jazan               20.711735
Najran              20.692951
Assir               20.257414
Name: temp, dtype: float64

In [23]:
df2 = (df[df['city'].isin(s.index)]
               .groupby(['city', 'weather'])['temp']
               .size()
               .unstack(fill_value=0)
               .add_suffix(' days')
               .reindex(s.index)
               .reset_index()
               .rename_axis(None, axis=1))

df2.insert(1, 'temp avg', df2['city'].map(s))
df2.insert(0, 'rank', range(1, len(df2) + 1))
df2

Unnamed: 0,rank,city,temp avg,Broken Clouds days,Clear days,Cloudy days,Cool days,Dense Fog days,Drizzle Broken Clouds days,Drizzle Dense Fog days,Drizzle Fog days,Drizzle More Clouds Than Sun days,Drizzle Mostly Cloudy days,Drizzle Overcast days,Drizzle Partly Sunny days,Duststorm days,Extremely Hot days,Fog days,Hail Cloudy days,Hail Partly Sunny days,Hail Passing Clouds days,Haze days,Heavy Rain More Clouds Than Sun days,Heavy Rain Mostly Cloudy days,Heavy Rain Overcast days,Heavy Rain Partly Sunny days,Hot days,Light Rain Broken Clouds days,Light Rain Fog days,Light Rain More Clouds Than Sun days,Light Rain Mostly Cloudy days,Light Rain Overcast days,Light Rain Partly Cloudy days,Light Rain Partly Sunny days,Light Rain Passing Clouds days,Light Rain Scattered Clouds days,Low Level Haze days,Mild days,More Clouds Than Sun days,Mostly Cloudy days,...,Pleasantly Warm days,Rain Broken Clouds days,Rain Clear days,Rain Fog days,Rain More Clouds Than Sun days,Rain Mostly Cloudy days,Rain Overcast days,Rain Partly Cloudy days,Rain Partly Sunny days,Rain Passing Clouds days,Rain Sandstorm days,Rain Scattered Clouds days,Rain Showers Partly Sunny days,Refreshingly Cool days,Sandstorm days,Scattered Clouds days,Smoke days,Sprinkles Cloudy days,Sprinkles Duststorm days,Sprinkles Low Level Haze days,Sprinkles Overcast days,Strong Thunderstorms Cloudy days,Strong Thunderstorms More Clouds Than Sun days,Strong Thunderstorms Partly Sunny days,Sunny days,Thundershowers Partly Sunny days,Thundershowers Passing Clouds days,Thundershowers Scattered Clouds days,Thunderstorms Broken Clouds days,Thunderstorms Cloudy days,Thunderstorms Fog days,Thunderstorms More Clouds Than Sun days,Thunderstorms Mostly Cloudy days,Thunderstorms Overcast days,Thunderstorms Partly Cloudy days,Thunderstorms Partly Sunny days,Thunderstorms Passing Clouds days,Thunderstorms Sandstorm days,Thunderstorms Scattered Clouds days,Warm days
0,1,Mecca,29.023239,32,7581,0,0,10,1,0,0,0,0,0,0,7,2,9,0,0,0,9,0,0,0,0,1,2,0,0,1,0,0,1,1,0,27,1,1,0,...,0,1,0,0,0,0,0,0,2,2,0,1,0,0,60,1099,3,0,0,0,0,0,0,0,6524,5,17,3,4,0,0,0,0,1,1,30,81,0,11,4
1,2,Madina,28.70874,94,8668,0,0,1,0,0,0,0,0,0,0,156,5,14,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,2,4,15,...,2,3,0,0,0,1,0,4,3,7,0,3,0,0,6,1023,0,0,0,0,0,0,0,0,7527,0,1,0,6,0,0,2,0,0,4,14,42,0,6,0
2,3,Riyadh,27.957981,378,7538,0,0,64,3,0,0,7,8,0,1,714,5,77,0,0,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,281,2,49,43,...,1,1,1,0,7,10,0,26,2,2,0,1,0,0,7,527,0,20,4,1,17,0,0,0,5074,0,2,0,6,0,0,2,32,1,90,5,46,2,1,0
3,4,Eastern Province,27.215077,259,8524,6,2,32,3,0,1,3,8,1,2,102,10,272,1,0,0,194,1,3,1,0,0,10,2,7,19,1,15,4,13,3,194,5,67,65,...,3,4,0,0,4,20,1,8,3,5,0,2,0,0,3,462,0,0,0,0,0,1,1,1,6772,0,0,0,14,1,2,12,39,8,29,15,45,0,9,5
4,5,Qassim,25.96135,4,6950,0,0,9,0,0,0,0,0,3,0,265,2,228,0,0,0,0,0,0,0,2,1,0,0,0,0,9,0,4,0,0,76,8,0,0,...,2,0,0,0,0,0,22,0,23,26,0,0,0,4,0,1004,5,0,0,0,0,0,0,0,6854,0,0,0,2,1,0,0,0,17,1,53,219,0,8,1
5,6,Hail,25.927837,4,7041,0,0,9,0,0,0,0,0,3,0,274,2,224,0,0,0,0,0,0,0,2,1,0,0,0,0,9,0,4,0,0,77,7,0,0,...,2,0,0,0,0,0,21,0,23,31,0,0,0,4,0,1045,5,0,0,0,0,0,0,0,6940,0,0,0,2,1,0,0,0,17,1,53,219,0,6,1
6,7,Northern boarder,24.978799,317,8770,0,2,84,0,3,0,0,0,0,0,149,3,134,0,0,1,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,173,1,0,0,...,5,1,0,0,0,0,0,6,69,106,0,0,0,0,9,515,0,0,0,0,0,0,0,0,8095,0,0,0,2,0,0,0,0,0,3,25,59,0,0,1
7,8,Baha,23.46811,67,7834,0,2,8,0,0,1,0,0,0,3,93,0,290,0,0,0,119,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,1,2,6,...,2,1,0,0,0,3,2,1,10,6,0,2,1,0,0,1677,1,0,0,0,0,0,0,0,5559,0,0,1,18,1,6,0,9,21,11,113,95,0,92,0
8,9,Jawf,23.101268,69,8478,0,0,60,1,0,8,0,1,0,0,98,2,162,0,1,1,8,0,0,0,0,0,0,0,1,0,0,0,1,0,0,75,1,6,5,...,1,6,0,4,10,9,6,5,23,28,8,3,0,0,54,894,0,0,0,0,0,0,0,0,7587,0,0,0,7,0,2,0,0,6,7,45,137,3,15,0
9,10,Tabuk,22.638735,42,6779,0,0,0,1,0,0,0,0,0,2,31,6,4,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,1,5,1,74,3,0,0,...,6,0,0,0,0,0,0,0,17,25,0,1,0,0,63,1069,2,0,0,0,0,0,0,0,7013,0,0,0,0,0,0,0,0,0,0,43,62,0,2,0


## Vesualiztion
---

In [24]:
#Quantitative variables:
quantitative = df[['temp', 'wind', 'humidity', 'barometer', 'visibility']]
print(quantitative)

        temp  wind  humidity  barometer  visibility
0         17    11      64.0     1018.0          16
1         17     6      64.0     1018.0          16
2         15    11      72.0     1019.0          16
3         15    11      72.0     1019.0          16
4         15     9      72.0     1019.0          16
5         13    13      82.0     1019.0          16
6         12     7      88.0     1019.0          16
7         14     9      72.0     1021.0          16
8         15     9      72.0     1021.0           7
9         17     7      64.0     1021.0           9
10        19    19      64.0     1021.0           7
11        20    19      60.0     1020.0          16
12        21    15      57.0     1020.0          16
13        22    15      53.0     1019.0          16
14        23    15      50.0     1018.0          16
15        22    22      53.0     1018.0          16
16        20    20      46.0     1018.0          16
17        19    19      49.0     1019.0          16
18        17

In [25]:
rcParams['figure.figsize'] = 9, 9
quantitative.hist();

In [26]:
# check this code 

In [27]:
#Drawing a heatmap
def facet_heatmap(df, color, **kws):
    values=df.columns.values[3]
    df = df.pivot(index='day', columns='hour', values=values)
    sns.heatmap(df, cmap='coolwarm', **kws)  

#Joining heatmaps of every month in a year 
def weather_calendar(year,weather): #Year= Any year in DataFrame. Weather=Any quantitative variable
    datayear = df[df['year']==year][['month', 'day', 'hour', weather]]
    vmin=datayear[weather].min()
    vmax=datayear[weather].max()
    with sns.plotting_context(font_scale=12):
        g = sns.FacetGrid(datayear,col="month", col_wrap=3) #One heatmap per month
        g = g.map_dataframe(facet_heatmap,vmin=vmin, vmax=vmax)
        g.set_axis_labels('Hour', 'Day')
        plt.subplots_adjust(top=0.9)
        g.fig.suptitle('%s Calendar. Year: %s.' %(weather, year), fontsize=18)

In [28]:
# check this code 

In [29]:
weather_calendar(2017,'temp')

TypeError: ignored

In [None]:
# check this code 

In [None]:
year_humi = df.groupby(df.year).mean()
pd.ewm(year_humi.humidity, 5).plot()
year_humi.humidity.plot(linewidth=1)
plt.title('Saudi Average Humidity by year')
plt.xlabel('year')

## Target
---

In [30]:
df.weather.unique()

array(['Clear', 'Sunny', 'Scattered Clouds', 'Partly Sunny',
       'Passing Clouds', 'Refreshingly Cool', 'Low Level Haze',
       'Duststorm', 'Thunderstorms Passing Clouds', 'Fog',
       'Thunderstorms Partly Sunny', 'Light Rain Partly Sunny',
       'Dense Fog', 'Thunderstorms Scattered Clouds',
       'Rain Passing Clouds', 'Extremely Hot', 'Rain Partly Sunny',
       'Pleasantly Warm', 'Hot', 'Mild', 'Overcast', 'Rain Overcast',
       'Smoke', 'Thunderstorms Broken Clouds', 'Heavy Rain Partly Sunny',
       'Thunderstorms Overcast', 'Light Rain Overcast', 'Warm',
       'Thunderstorms Cloudy', 'Drizzle Overcast',
       'Thunderstorms Partly Cloudy', 'Broken Clouds', 'Sandstorm',
       'Partly Cloudy', 'Mostly Cloudy', 'Rain Partly Cloudy',
       'Rain Broken Clouds', 'Rain Scattered Clouds', 'Haze',
       'Rain Mostly Cloudy', 'Hail Partly Sunny',
       'Thundershowers Passing Clouds',
       'Thunderstorms More Clouds Than Sun', 'More Clouds Than Sun',
       'Light Rain 

In [31]:
df.weather.nunique()

81

In [32]:
plt.figure(figsize=(7,5))
df['weather'].value_counts().head(15).plot(kind='barh', color= 'g')
plt.title('15 most common weathers in Saudi Arabia');
plt.grid(axis= 'y');

Clear and Sunny are most common weatehrs conditions in Saudi Arabia.

In [33]:
plt.figure(figsize=(7, 5))
sns.distplot(df['temp'],bins=[i for i in range(0,61,5)], kde=False)
plt.title("Distribution of Temperatures")
plt.grid();

Most common temperature scale in Saudi Arabia is from 20 to 25 degree.

In [34]:
df.wind.plot(kind = 'line', color = "blue", label = "wind", figsize = (10,7),
                        linewidth = 1, alpha = 0.5, grid = True, linestyle = '-')

df.temp.plot(kind = 'line', color = "blue", label = "temp", figsize = (10,7),
                            linewidth = 1, alpha = 1, grid = True, linestyle = 'dashed')
plt.legend()
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.title("Line Plot")
plt.show()

In [35]:
profile = pandas_profiling.ProfileReport(df)
profile

0,1
Number of variables,14
Number of observations,249023
Total Missing (%),0.0%
Total size in memory,26.6 MiB
Average record size in memory,112.0 B

0,1
Numeric,10
Categorical,3
Boolean,0
Date,1
Text (Unique),0
Rejected,0
Unsupported,0

0,1
Distinct count,56
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1015.5
Minimum,904
Maximum,1101
Zeros (%),0.0%

0,1
Minimum,904
5-th percentile,1003
Q1,1011
Median,1016
Q3,1021
95-th percentile,1025
Maximum,1101
Range,197
Interquartile range,10

0,1
Standard deviation,6.9704
Coef of variation,0.0068643
Kurtosis,0.07855
Mean,1015.5
MAD,5.7568
Skewness,-0.40641
Sum,252870000
Variance,48.586
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
1022.0,12779,5.1%,
1016.0,12637,5.1%,
1023.0,12612,5.1%,
1015.0,12592,5.1%,
1021.0,12511,5.0%,
1014.0,12475,5.0%,
1013.0,12298,4.9%,
1024.0,12029,4.8%,
1012.0,11585,4.7%,
1017.0,11309,4.5%,

Value,Count,Frequency (%),Unnamed: 3
904.0,1,0.0%,
990.0,2,0.0%,
991.0,29,0.0%,
992.0,71,0.0%,
993.0,224,0.1%,

Value,Count,Frequency (%),Unnamed: 3
1030.0,138,0.1%,
1031.0,44,0.0%,
1032.0,2,0.0%,
1053.0,1,0.0%,
1101.0,2,0.0%,

0,1
Distinct count,13
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Jawf,20352
Mecca,20268
Tabuk,20240
Other values (10),188163

Value,Count,Frequency (%),Unnamed: 3
Jawf,20352,8.2%,
Mecca,20268,8.1%,
Tabuk,20240,8.1%,
Northern boarder,20235,8.1%,
Hail,20121,8.1%,
Madina,19965,8.0%,
Baha,19959,8.0%,
Najran,19847,8.0%,
Jazan,19829,8.0%,
Qassim,19793,7.9%,

0,1
Distinct count,850
Unique (%),0.3%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Minimum,2017-01-01 00:00:00
Maximum,2019-04-30 00:00:00

0,1
Distinct count,31
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,15.691
Minimum,1
Maximum,31
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,2
Q1,8
Median,16
Q3,23
95-th percentile,29
Maximum,31
Range,30
Interquartile range,15

0,1
Standard deviation,8.788
Coef of variation,0.56006
Kurtosis,-1.1922
Mean,15.691
MAD,7.6076
Skewness,0.0096391
Sum,3907440
Variance,77.228
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
1,8255,3.3%,
27,8244,3.3%,
20,8244,3.3%,
22,8243,3.3%,
5,8238,3.3%,
7,8234,3.3%,
8,8233,3.3%,
6,8221,3.3%,
13,8220,3.3%,
4,8219,3.3%,

Value,Count,Frequency (%),Unnamed: 3
1,8255,3.3%,
2,8211,3.3%,
3,8146,3.3%,
4,8219,3.3%,
5,8238,3.3%,

Value,Count,Frequency (%),Unnamed: 3
27,8244,3.3%,
28,8147,3.3%,
29,7334,2.9%,
30,7390,3.0%,
31,4707,1.9%,

0,1
Distinct count,24
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,12.537
Minimum,1
Maximum,24
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,2
Q1,7
Median,13
Q3,19
95-th percentile,23
Maximum,24
Range,23
Interquartile range,12

0,1
Standard deviation,6.9103
Coef of variation,0.55119
Kurtosis,-1.1978
Mean,12.537
MAD,5.9849
Skewness,-0.0045631
Sum,3121974
Variance,47.752
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
7,10479,4.2%,
8,10478,4.2%,
13,10471,4.2%,
17,10457,4.2%,
24,10454,4.2%,
23,10454,4.2%,
10,10439,4.2%,
22,10437,4.2%,
21,10433,4.2%,
11,10429,4.2%,

Value,Count,Frequency (%),Unnamed: 3
1,10363,4.2%,
2,10229,4.1%,
3,10033,4.0%,
4,10095,4.1%,
5,10265,4.1%,

Value,Count,Frequency (%),Unnamed: 3
20,10356,4.2%,
21,10433,4.2%,
22,10437,4.2%,
23,10454,4.2%,
24,10454,4.2%,

0,1
Distinct count,98
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,37.553
Minimum,0
Maximum,100
Zeros (%),0.0%

0,1
Minimum,0
5-th percentile,8
Q1,17
Median,32
Q3,55
95-th percentile,82
Maximum,100
Range,100
Interquartile range,38

0,1
Standard deviation,23.592
Coef of variation,0.62824
Kurtosis,-0.61365
Mean,37.553
MAD,19.953
Skewness,0.60695
Sum,9351600
Variance,556.6
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
13.0,6903,2.8%,
15.0,5878,2.4%,
11.0,5858,2.4%,
12.0,5466,2.2%,
14.0,5370,2.2%,
10.0,5261,2.1%,
18.0,5183,2.1%,
20.0,5110,2.1%,
16.0,5054,2.0%,
17.0,4869,2.0%,

Value,Count,Frequency (%),Unnamed: 3
0.0,4,0.0%,
1.0,159,0.1%,
2.0,460,0.2%,
3.0,815,0.3%,
4.0,1035,0.4%,

Value,Count,Frequency (%),Unnamed: 3
89.0,138,0.1%,
90.0,1,0.0%,
93.0,80,0.0%,
94.0,2151,0.9%,
100.0,1850,0.7%,

0,1
Distinct count,59
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.13111
Minimum,0
Maximum,59
Zeros (%),99.5%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,0
Maximum,59
Range,59
Interquartile range,0

0,1
Standard deviation,1.9707
Coef of variation,15.031
Kurtosis,310.59
Mean,0.13111
MAD,0.26079
Skewness,16.923
Sum,32649
Variance,3.8837
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
0,247670,99.5%,
30,112,0.0%,
25,50,0.0%,
15,48,0.0%,
23,47,0.0%,
16,45,0.0%,
18,43,0.0%,
24,38,0.0%,
31,37,0.0%,
34,35,0.0%,

Value,Count,Frequency (%),Unnamed: 3
0,247670,99.5%,
1,9,0.0%,
2,14,0.0%,
3,13,0.0%,
4,16,0.0%,

Value,Count,Frequency (%),Unnamed: 3
55,3,0.0%,
56,2,0.0%,
57,1,0.0%,
58,2,0.0%,
59,1,0.0%,

0,1
Distinct count,12
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,6.0507
Minimum,1
Maximum,12
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,3
Median,6
Q3,9
95-th percentile,12
Maximum,12
Range,11
Interquartile range,6

0,1
Standard deviation,3.5216
Coef of variation,0.58201
Kurtosis,-1.251
Mean,6.0507
MAD,3.0792
Skewness,0.18491
Sum,1506762
Variance,12.402
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
3,26925,10.8%,
1,26074,10.5%,
4,25910,10.4%,
2,23817,9.6%,
10,19093,7.7%,
12,18705,7.5%,
11,18503,7.4%,
7,18334,7.4%,
8,18279,7.3%,
5,18147,7.3%,

Value,Count,Frequency (%),Unnamed: 3
1,26074,10.5%,
2,23817,9.6%,
3,26925,10.8%,
4,25910,10.4%,
5,18147,7.3%,

Value,Count,Frequency (%),Unnamed: 3
8,18279,7.3%,
9,17665,7.1%,
10,19093,7.7%,
11,18503,7.4%,
12,18705,7.5%,

0,1
Distinct count,55
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,24.723
Minimum,-4
Maximum,50
Zeros (%),0.0%

0,1
Minimum,-4
5-th percentile,11
Q1,18
Median,24
Q3,31
95-th percentile,40
Maximum,50
Range,54
Interquartile range,13

0,1
Standard deviation,8.8809
Coef of variation,0.35922
Kurtosis,-0.55428
Mean,24.723
MAD,7.3093
Skewness,0.13561
Sum,6156502
Variance,78.871
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
23,10432,4.2%,
22,10238,4.1%,
20,10013,4.0%,
24,9952,4.0%,
21,9939,4.0%,
25,9835,3.9%,
19,9628,3.9%,
26,9552,3.8%,
27,9420,3.8%,
18,9154,3.7%,

Value,Count,Frequency (%),Unnamed: 3
-4,1,0.0%,
-3,7,0.0%,
-2,7,0.0%,
-1,38,0.0%,
0,35,0.0%,

Value,Count,Frequency (%),Unnamed: 3
46,603,0.2%,
47,190,0.1%,
48,33,0.0%,
49,13,0.0%,
50,2,0.0%,

0,1
Distinct count,710
Unique (%),0.3%
Missing (%),0.0%
Missing (n),0

0,1
07:00,10415
13:00,10412
08:00,10408
Other values (707),217788

Value,Count,Frequency (%),Unnamed: 3
07:00,10415,4.2%,
13:00,10412,4.2%,
08:00,10408,4.2%,
17:00,10391,4.2%,
00:00,10391,4.2%,
23:00,10389,4.2%,
12:00,10378,4.2%,
11:00,10376,4.2%,
15:00,10373,4.2%,
09:00,10371,4.2%,

0,1
Distinct count,15
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,11.053
Minimum,-1
Maximum,161
Zeros (%),0.2%

0,1
Minimum,-1
5-th percentile,-1
Q1,5
Median,16
Q3,16
95-th percentile,16
Maximum,161
Range,162
Interquartile range,11

0,1
Standard deviation,7.053
Coef of variation,0.63808
Kurtosis,0.60704
Mean,11.053
MAD,6.3809
Skewness,-0.79262
Sum,2752564
Variance,49.745
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
16,160541,64.5%,
-1,49644,19.9%,
8,9679,3.9%,
7,6621,2.7%,
6,4479,1.8%,
5,4358,1.8%,
9,3877,1.6%,
4,2877,1.2%,
3,2654,1.1%,
2,2420,1.0%,

Value,Count,Frequency (%),Unnamed: 3
-1,49644,19.9%,
0,405,0.2%,
1,1462,0.6%,
2,2420,1.0%,
3,2654,1.1%,

Value,Count,Frequency (%),Unnamed: 3
9,3877,1.6%,
16,160541,64.5%,
29,3,0.0%,
35,1,0.0%,
161,2,0.0%,

0,1
Distinct count,81
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Clear,98827
Sunny,82194
Passing Clouds,34380
Other values (78),33622

Value,Count,Frequency (%),Unnamed: 3
Clear,98827,39.7%,
Sunny,82194,33.0%,
Passing Clouds,34380,13.8%,
Scattered Clouds,15304,6.1%,
Partly Sunny,6925,2.8%,
Duststorm,1893,0.8%,
Fog,1501,0.6%,
Broken Clouds,1272,0.5%,
Thunderstorms Passing Clouds,1232,0.5%,
Low Level Haze,1046,0.4%,

0,1
Distinct count,44
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,12.957
Minimum,-1
Maximum,163
Zeros (%),10.7%

0,1
Minimum,-1
5-th percentile,0
Q1,7
Median,11
Q3,19
95-th percentile,30
Maximum,163
Range,164
Interquartile range,12

0,1
Standard deviation,8.7116
Coef of variation,0.67234
Kurtosis,1.1782
Mean,12.957
MAD,6.9397
Skewness,0.77252
Sum,3226617
Variance,75.892
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
7,31173,12.5%,
0,26711,10.7%,
11,25646,10.3%,
9,22989,9.2%,
15,22684,9.1%,
19,20276,8.1%,
6,16513,6.6%,
13,15182,6.1%,
22,13394,5.4%,
17,10024,4.0%,

Value,Count,Frequency (%),Unnamed: 3
-1,109,0.0%,
0,26711,10.7%,
2,402,0.2%,
4,7849,3.2%,
6,16513,6.6%,

Value,Count,Frequency (%),Unnamed: 3
76,1,0.0%,
80,1,0.0%,
93,2,0.0%,
115,1,0.0%,
163,1,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2017.7
Minimum,2017
Maximum,2019
Zeros (%),0.0%

0,1
Minimum,2017
5-th percentile,2017
Q1,2017
Median,2018
Q3,2018
95-th percentile,2019
Maximum,2019
Range,2
Interquartile range,1

0,1
Standard deviation,0.70611
Coef of variation,0.00034996
Kurtosis,-0.91186
Mean,2017.7
MAD,0.61961
Skewness,0.47765
Sum,502456199
Variance,0.4986
Memory size,1.9 MiB

Value,Count,Frequency (%),Unnamed: 3
2017,108659,43.6%,
2018,103920,41.7%,
2019,36444,14.6%,

Value,Count,Frequency (%),Unnamed: 3
2017,108659,43.6%,
2018,103920,41.7%,
2019,36444,14.6%,

Value,Count,Frequency (%),Unnamed: 3
2017,108659,43.6%,
2018,103920,41.7%,
2019,36444,14.6%,

Unnamed: 0,city,date,time,year,month,day,hour,minute,weather,temp,wind,humidity,barometer,visibility
0,Qassim,2017-01-01,00:00,2017,1,1,24,0,Clear,17,11,64.0,1018.0,16
1,Qassim,2017-01-01,01:00,2017,1,1,1,0,Clear,17,6,64.0,1018.0,16
2,Qassim,2017-01-01,03:00,2017,1,1,3,0,Clear,15,11,72.0,1019.0,16
3,Qassim,2017-01-01,04:00,2017,1,1,4,0,Clear,15,11,72.0,1019.0,16
4,Qassim,2017-01-01,05:00,2017,1,1,5,0,Clear,15,9,72.0,1019.0,16


## Time Series Analysis

---


In [36]:
#Helpful method to plot series
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)

In [37]:
series = np.array(df['temp'])
time = np.array(df.index)
plt.figure(figsize=(10, 3))
plt.ylabel('temp')
plot_series(time, series)

## Check for missing days for each City
To choose city to work on.
---

In [38]:
df.city.unique()

array(['Qassim', 'Hail', 'Madina', 'Eastern Province', 'Riyadh', 'Mecca',
       'Tabuk', 'Assir', 'Northern boarder', 'Jazan', 'Najran', 'Baha',
       'Jawf'], dtype=object)

#### Mecca

In [39]:
df_mkh = df.loc[(df['city'] == 'Mecca')]

In [40]:
df_mkh.date.nsmallest()

94805   2017-01-01
94806   2017-01-01
94807   2017-01-01
94808   2017-01-01
94809   2017-01-01
Name: date, dtype: datetime64[ns]

In [41]:
df_mkh.date.nlargest()

115049   2019-04-30
115050   2019-04-30
115051   2019-04-30
115052   2019-04-30
115053   2019-04-30
Name: date, dtype: datetime64[ns]

In [42]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [43]:
print(my_range.difference(df_mkh['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Riyadh

In [44]:
df_ruh = df.loc[(df['city'] == 'Riyadh')]

In [45]:
df_ruh.date.nsmallest()

78384   2017-03-23
78385   2017-03-23
78386   2017-03-23
78387   2017-03-24
78388   2017-03-24
Name: date, dtype: datetime64[ns]

In [46]:
df_ruh.date.nlargest()

94781   2019-04-30
94782   2019-04-30
94783   2019-04-30
94784   2019-04-30
94785   2019-04-30
Name: date, dtype: datetime64[ns]

In [47]:
my_range = pd.date_range(
  start='2017-03-23', end='2019-04-30', freq='D')

In [48]:
print(my_range.difference(df_ruh['date']))

DatetimeIndex(['2017-03-25', '2017-03-26', '2017-03-27', '2017-03-28',
               '2017-03-29', '2017-03-30', '2017-03-31', '2017-04-01',
               '2017-04-02', '2017-04-03', '2017-04-04', '2017-04-05',
               '2017-04-06', '2017-04-07', '2017-04-08', '2017-04-09',
               '2017-04-10', '2017-04-11', '2017-04-12', '2017-04-13',
               '2017-04-14', '2017-04-15', '2017-04-16', '2017-04-17',
               '2017-04-18', '2018-01-01', '2018-01-02', '2018-01-03',
               '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07',
               '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
               '2018-01-12', '2018-01-13', '2018-01-14', '2018-01-15',
               '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19',
               '2018-01-20', '2018-01-21', '2018-01-22', '2018-01-23',
               '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-27',
               '2018-01-28', '2018-01-29', '2018-01-30', '2018-01-31',
      

#### Qassim

In [49]:
df_qas = df.loc[(df['city'] == 'Qassim')]

In [50]:
df_qas.date.nsmallest()

0   2017-01-01
1   2017-01-01
2   2017-01-01
3   2017-01-01
4   2017-01-01
Name: date, dtype: datetime64[ns]

In [51]:
df_qas.date.nlargest()

19786   2019-04-17
19787   2019-04-17
19788   2019-04-17
19789   2019-04-17
19790   2019-04-17
Name: date, dtype: datetime64[ns]

In [52]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-17', freq='D')

In [53]:
print(my_range.difference(df_qas['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Hail

In [54]:
df_hail = df.loc[(df['city'] == 'Hail')]

In [55]:
df_hail.date.nsmallest()

19793   2017-01-01
19794   2017-01-01
19795   2017-01-01
19796   2017-01-01
19797   2017-01-01
Name: date, dtype: datetime64[ns]

In [56]:
df_hail.date.nlargest()

39890   2019-04-30
39891   2019-04-30
39892   2019-04-30
39893   2019-04-30
39894   2019-04-30
Name: date, dtype: datetime64[ns]

In [57]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [58]:
print(my_range.difference(df_hail['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Madina

In [59]:
df_mad = df.loc[(df['city'] == 'Madina')]

In [60]:
df_mad.date.nsmallest()

39914   2017-01-01
39915   2017-01-01
39916   2017-01-01
39917   2017-01-01
39918   2017-01-01
Name: date, dtype: datetime64[ns]

In [61]:
df_mad.date.nlargest()

59855   2019-04-30
59856   2019-04-30
59857   2019-04-30
59858   2019-04-30
59859   2019-04-30
Name: date, dtype: datetime64[ns]

In [62]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [63]:
print(my_range.difference(df_mad['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Eastern Province

In [64]:
df_ep = df.loc[(df['city'] == 'Eastern Province')]

In [65]:
df_ep.date.nsmallest()

59879   2017-01-01
59880   2017-01-01
59881   2017-01-01
59882   2017-01-01
59883   2017-01-01
Name: date, dtype: datetime64[ns]

In [66]:
df_ep.date.nlargest()

78364   2019-04-30
78365   2019-04-30
78366   2019-04-30
78367   2019-04-30
78368   2019-04-30
Name: date, dtype: datetime64[ns]

In [67]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [68]:
print(my_range.difference(df_ep['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Tabuk

In [69]:
df_tab = df.loc[(df['city'] == 'Tabuk')]

In [70]:
df_tab.date.nsmallest()

115073   2017-01-01
115074   2017-01-01
115075   2017-01-01
115076   2017-01-01
115077   2017-01-01
Name: date, dtype: datetime64[ns]

In [71]:
df_tab.date.nlargest()

135289   2019-04-30
135290   2019-04-30
135291   2019-04-30
135292   2019-04-30
135293   2019-04-30
Name: date, dtype: datetime64[ns]

In [72]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [73]:
print(my_range.difference(df_tab['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Assir

In [74]:
df_asir = df.loc[(df['city'] == 'Assir')]

In [75]:
df_asir.date.nsmallest()

135313   2017-01-01
135314   2017-01-01
135315   2017-01-01
135316   2017-01-01
135317   2017-01-01
Name: date, dtype: datetime64[ns]

In [76]:
df_asir.date.nlargest()

148777   2019-04-30
148778   2019-04-30
148779   2019-04-30
148780   2019-04-30
148781   2019-04-30
Name: date, dtype: datetime64[ns]

In [77]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [78]:
print(my_range.difference(df_asir['date']))

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2018-09-21', '2018-09-22', '2018-09-23', '2018-09-24',
               '2018-09-25', '2018-09-26', '2018-09-27', '2018-09-28',
               '2018-09-29', '2018-09-30'],
              dtype='datetime64[ns]', length=273, freq=None)


#### Northern boarder

In [79]:
df_nb = df.loc[(df['city'] == 'Northern boarder')]

In [80]:
df_nb.date.nsmallest()

148801   2017-01-01
148802   2017-01-01
148803   2017-01-01
148804   2017-01-01
148805   2017-01-01
Name: date, dtype: datetime64[ns]

In [81]:
df_nb.date.nlargest()

169013   2019-04-30
169014   2019-04-30
169015   2019-04-30
169016   2019-04-30
169017   2019-04-30
Name: date, dtype: datetime64[ns]

In [82]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [83]:
print(my_range.difference(df_nb['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Jazan

In [84]:
df_jaz = df.loc[(df['city'] == 'Jazan')]

In [85]:
df_jaz.date.nsmallest()

169036   2017-01-01
169037   2017-01-01
169038   2017-01-01
169039   2017-01-01
169040   2017-01-01
Name: date, dtype: datetime64[ns]

In [86]:
df_jaz.date.nlargest()

188841   2019-04-30
188842   2019-04-30
188843   2019-04-30
188844   2019-04-30
188845   2019-04-30
Name: date, dtype: datetime64[ns]

In [87]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [88]:
print(my_range.difference(df_jaz['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Najran

In [89]:
df_naj = df.loc[(df['city'] == 'Najran')]

In [90]:
df_naj.date.nsmallest()

188865   2017-01-01
188866   2017-01-01
188867   2017-01-01
188868   2017-01-01
188869   2017-01-01
Name: date, dtype: datetime64[ns]

In [91]:
df_naj.date.nlargest()

208689   2019-04-30
208690   2019-04-30
208691   2019-04-30
208692   2019-04-30
208693   2019-04-30
Name: date, dtype: datetime64[ns]

In [92]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [93]:
print(my_range.difference(df_naj['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Baha

In [94]:
df_baha = df.loc[(df['city'] == 'Baha')]

In [95]:
df_baha.date.nsmallest()

208712   2017-01-01
208713   2017-01-01
208714   2017-01-01
208715   2017-01-01
208716   2017-01-01
Name: date, dtype: datetime64[ns]

In [96]:
df_baha.date.nlargest()

228647   2019-04-30
228648   2019-04-30
228649   2019-04-30
228650   2019-04-30
228651   2019-04-30
Name: date, dtype: datetime64[ns]

In [97]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [98]:
print(my_range.difference(df_baha['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Jawf

In [99]:
df_jwf = df.loc[(df['city'] == 'Jawf')]

In [100]:
df_jwf.date.nsmallest()

228671   2017-01-01
228672   2017-01-01
228673   2017-01-01
228674   2017-01-01
228675   2017-01-01
Name: date, dtype: datetime64[ns]

In [101]:
df_jwf.date.nlargest()

248999   2019-04-30
249000   2019-04-30
249001   2019-04-30
249002   2019-04-30
249003   2019-04-30
Name: date, dtype: datetime64[ns]

In [102]:
my_range = pd.date_range(
  start='2017-01-01', end='2019-04-30', freq='D')

In [103]:
print(my_range.difference(df_jwf['date']))

DatetimeIndex([], dtype='datetime64[ns]', freq=None)


#### Riyadh & Assir have missing dates

we will try fix it later for future work

---

### Choosing Mecca data to be our data on this project. 🕋

In [104]:
df_mkh.to_csv('mkhdata')