## Importing Requierd Libraries.

In [27]:
from pymongo import MongoClient
import pandas as pd
import numpy as np


## Loading the Data from the MongoDB.

In [28]:
client = MongoClient('mongodb://sahil:101202@0.0.0.0:27017/')  # Connecting to the MongoDB database.
db = client['netherlands'] # selecting the Data pf Nehterlands.
collection = db['hourly']  # selecting Hourly Data.
cursor = collection.find({'station_code':370})  # Loading  data of city from Netherlands called "Eindhoven".
data = pd.DataFrame([document for document in cursor]) # converting the data to the DataFrame.

data.head()

Unnamed: 0,_id,station_code,date,hour,DD,FH,FF,FX,T,T10N,...,WW,IX,M,R,S,O,Y,YEAR,MONTH,DAY
0,65e99611d4b2173e044170fa,370,2014-02-01T00:00:00.000Z,1,170.0,70.0,70.0,130.0,65.0,,...,57.0,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1
1,65e99611d4b2173e044170fb,370,2014-02-01T00:00:00.000Z,2,170.0,70.0,70.0,120.0,64.0,,...,61.0,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1
2,65e99611d4b2173e044170fc,370,2014-02-01T00:00:00.000Z,3,170.0,70.0,70.0,110.0,66.0,,...,51.0,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1
3,65e99611d4b2173e044170fd,370,2014-02-01T00:00:00.000Z,4,180.0,60.0,70.0,110.0,71.0,,...,22.0,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1
4,65e99611d4b2173e044170fe,370,2014-02-01T00:00:00.000Z,5,180.0,80.0,80.0,140.0,70.0,,...,81.0,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1


#### Checking for NULL / Nan Values.

In [29]:
data.isna().sum()

_id                 0
station_code        0
date                0
hour                0
DD                  0
FH                  0
FF                  0
FX                  0
T                   0
T10N            73040
TD                  0
SQ                  0
Q                   0
DR                  0
RH                  0
P                   0
VV                 77
N                  66
U                   0
WW              54017
IX                  0
M                  82
R                  82
S                  82
O                  82
Y                  82
YEAR                0
MONTH               0
DAY                 0
dtype: int64

#### We Only Required here temperature ( "T" ), date, and Hour. and there is no null or nan value for that features.

## Merging the "date" and "Hour" to have a Index for the Temperature.

In [30]:
# Convert 'date' column to datetime if it's not already
data['date'] = pd.to_datetime(data['date'])

# If hour is 0, set the date as the next day
data.loc[data['hour'] == 0, 'date'] = data.loc[data['hour'] == 0, 'date'] + pd.Timedelta(days=1)

# Combine 'date' and 'hour' columns into a new column called 'datetime'
data['datetime'] = data['date'] + pd.to_timedelta(data['hour'], unit='h')

data.head()

Unnamed: 0,_id,station_code,date,hour,DD,FH,FF,FX,T,T10N,...,IX,M,R,S,O,Y,YEAR,MONTH,DAY,datetime
0,65e99611d4b2173e044170fa,370,2014-02-01 00:00:00+00:00,1,170.0,70.0,70.0,130.0,65.0,,...,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1,2014-02-01 01:00:00+00:00
1,65e99611d4b2173e044170fb,370,2014-02-01 00:00:00+00:00,2,170.0,70.0,70.0,120.0,64.0,,...,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1,2014-02-01 02:00:00+00:00
2,65e99611d4b2173e044170fc,370,2014-02-01 00:00:00+00:00,3,170.0,70.0,70.0,110.0,66.0,,...,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1,2014-02-01 03:00:00+00:00
3,65e99611d4b2173e044170fd,370,2014-02-01 00:00:00+00:00,4,180.0,60.0,70.0,110.0,71.0,,...,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1,2014-02-01 04:00:00+00:00
4,65e99611d4b2173e044170fe,370,2014-02-01 00:00:00+00:00,5,180.0,80.0,80.0,140.0,70.0,,...,7.0,0.0,1.0,0.0,0.0,0.0,2014,2,1,2014-02-01 05:00:00+00:00


### Selecting only date for INdex and Temperature for the forecasting.

In [31]:
data = data[['datetime', 'T', 'P', 'RH', 'U']]
data['RH'] = data['RH'].replace(-1, 0.5)
data.head()

Unnamed: 0,datetime,T,P,RH,U
0,2014-02-01 01:00:00+00:00,65.0,9969.0,2.0,74.0
1,2014-02-01 02:00:00+00:00,64.0,9952.0,3.0,77.0
2,2014-02-01 03:00:00+00:00,66.0,9944.0,2.0,78.0
3,2014-02-01 04:00:00+00:00,71.0,9933.0,0.5,75.0
4,2014-02-01 05:00:00+00:00,70.0,9925.0,0.5,76.0


### sorting the dataset by dates and years.

In [32]:
data = data.sort_values(by='datetime')

### Saving the Data as CSV for further Studies.

In [33]:
# Save DataFrame to CSV
data.to_csv('/Users/sahilnakrani/Documents/weather forecast/src/Machine-Learning/Final-Data/Cleaned_Data.csv', index=False)