In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

### Constants

In [24]:
DATA_PATH = "../data/raw/istanbul_weather.csv"

### Loading the data

In [25]:
def load_data(path: str):
    """This function loads the data from the given path."""
    
    if os.path.exists(path):
        return pd.read_csv(path)
    else:
        raise FileNotFoundError(f"File not found at {path}")

In [26]:
df = load_data(DATA_PATH)
df.head()

Unnamed: 0,time,temperature_2m,relative_humidity_2m,dew_point_2m,surface_pressure,precipitation,weather_code,cloud_cover,shortwave_radiation,wind_speed_10m,wind_direction_10m,soil_temperature_0_to_7cm
0,2006-01-01T00:00,5.1,93,4.0,1018.7,0.0,0,5,0.0,9.0,143,5.9
1,2006-01-01T01:00,4.7,93,3.7,1018.6,0.0,0,5,0.0,9.2,141,5.4
2,2006-01-01T02:00,5.0,92,3.8,1018.4,0.0,0,15,0.0,9.3,144,5.0
3,2006-01-01T03:00,5.6,91,4.3,1018.5,0.0,0,19,0.0,9.5,151,4.6
4,2006-01-01T04:00,6.1,90,4.7,1018.5,0.0,0,9,0.0,9.4,157,4.3


### Control Nan values in each column

In [27]:
def check_missing_data(df):
    mis_val = df.isnull().sum()
    
    mis_val_percent = 100 * df.isnull().sum() / len(df)
    
    mis_val_dtype = df.dtypes
    
    mis_val_table = pd.concat([mis_val, mis_val_percent, mis_val_dtype], axis=1)

    mis_val_table_renamed = mis_val_table.rename(
    columns = {0 : 'Eksik Değerler', 1 : '% Değeri', 2 : 'Veri Tipi'})
    
    mis_val_table_renamed = mis_val_table_renamed[
        mis_val_table_renamed.iloc[:,1] != 0].sort_values(
        '% Değeri', ascending=False).round(2)
    
    print(f"Total feature: {len(df.columns)}")
    print(f"Missing feature: {mis_val_table_renamed.shape[0]}")
    
    return mis_val_table_renamed

In [28]:
check_missing_data(df)

Total feature: 12
Missing feature: 0


Unnamed: 0,Eksik Değerler,% Değeri,Veri Tipi


### Dataset information

In [30]:
df.describe()

Unnamed: 0,temperature_2m,relative_humidity_2m,dew_point_2m,surface_pressure,precipitation,weather_code,cloud_cover,shortwave_radiation,wind_speed_10m,wind_direction_10m,soil_temperature_0_to_7cm
count,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0,175800.0
mean,15.209908,75.647253,10.589929,1011.352464,0.079688,8.78273,47.47773,177.168652,14.621527,118.336695,16.245369
std,7.540353,14.135567,6.476791,6.537146,0.348814,18.744088,39.9218,254.642998,7.597925,101.962436,8.632981
min,-6.9,13.0,-11.7,975.7,0.0,0.0,0.0,0.0,0.0,0.0,-3.3
25%,9.1,66.0,5.8,1006.9,0.0,0.0,5.0,0.0,8.7,38.0,9.2
50%,14.9,77.0,10.6,1010.8,0.0,1.0,42.0,8.0,13.5,60.0,15.2
75%,21.5,87.0,16.0,1015.4,0.0,3.0,94.0,310.0,19.5,208.0,22.5
max,39.6,100.0,25.5,1035.5,16.4,75.0,100.0,971.0,55.7,360.0,44.4
