In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.nn import softmax

from tensorflow.keras.losses import categorical_crossentropy

In [127]:
df = pd.read_csv("./weather.csv")

# deleting bad data
df.drop(["Daily Summary","Formatted Date","Loud Cover","Apparent Temperature (C)"], axis=1, inplace = True)
df.dropna(axis=0,inplace=True)

df["Summary"].value_counts()


Partly Cloudy                          31635
Mostly Cloudy                          27914
Overcast                               16516
Clear                                  10763
Foggy                                   7117
Breezy and Overcast                      528
Breezy and Mostly Cloudy                 516
Breezy and Partly Cloudy                 386
Dry and Partly Cloudy                     86
Windy and Partly Cloudy                   67
Light Rain                                63
Breezy                                    54
Windy and Overcast                        45
Humid and Mostly Cloudy                   40
Drizzle                                   39
Breezy and Foggy                          35
Windy and Mostly Cloudy                   35
Dry                                       34
Humid and Partly Cloudy                   17
Dry and Mostly Cloudy                     14
Rain                                      10
Windy                                      8
Humid and 

In [None]:

# making label's frequecy same, to avoid bias
for summary,s_df in df.groupby("Summary"):
    if len(s_df)< 5000:
        df.drop(s_df.index,axis=0,inplace=True)

    elif len(s_df) > 10000:
        x = len(s_df) - 10000
        df.drop(s_df[:x].index,axis=0,inplace=True)
        
df.reset_index(inplace=True)

# converting categorical data into numeric
df[["rain","snow"]] = pd.get_dummies(df["Precip Type"])
df.drop(["Precip Type","index"], axis = 1, inplace=True)
df.info()

In [123]:
# normalization
x = df[["Humidity","Pressure (millibars)", "Temperature (C)","Visibility (km)","Wind Bearing (degrees)","Wind Speed (km/h)"]]
org_mean, org_std = x.mean(), x.std()

df[["Humidity","Pressure (millibars)", "Temperature (C)","Visibility (km)","Wind Bearing (degrees)","Wind Speed (km/h)"]] = (x - org_mean)/ org_std


targets = df[["Summary"]]
inputs = df.drop(["Summary"], axis= 1)
inputs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47117 entries, 0 to 47116
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Temperature (C)         47117 non-null  float64
 1   Humidity                47117 non-null  float64
 2   Wind Speed (km/h)       47117 non-null  float64
 3   Wind Bearing (degrees)  47117 non-null  float64
 4   Visibility (km)         47117 non-null  float64
 5   Pressure (millibars)    47117 non-null  float64
 6   rain                    47117 non-null  uint8  
 7   snow                    47117 non-null  uint8  
dtypes: float64(6), uint8(2)
memory usage: 2.2 MB


In [None]:
# one hot encoding the targets 
one_hot_targets = pd.get_dummies(targets)

labels = targets["Summary"].unique()
labels.sort()
labels

In [None]:
# converting into tensors
inputs_tf = tf.constant(inputs)
targets_tf = tf.cast(tf.constant(one_hot_targets), tf.float64)

In [None]:
# initializing the model
weights = tf.Variable(tf.random.normal(shape=(8,5), dtype = tf.float64))
bias = tf.Variable(tf.random.normal(shape=(1,5), dtype = tf.float64))

In [None]:
# main training loop
lr = 0.01
epochs = range(500)
for epoch in tqdm(epochs):
    
    with tf.GradientTape() as tape:
        
        # making prediction
        preds = tf.add(inputs_tf @ weights, bias)
        prob = softmax(preds)
        
        # calculating loss
        loss = categorical_crossentropy(targets_tf, prob)
        mean_loss = tf.reduce_sum(loss)/ len(targets_tf)
        
        # print(epoch, mean_loss)
        
        # taking gradints
        w_grad, b_grad = tape.gradient( mean_loss , [weights,bias] )
        
        
        # updating model
        weights = tf.Variable(weights - lr * w_grad)
        bias = tf.Variable(bias - lr * b_grad)

In [None]:
def predict(input_):
    pred = input_ @ weights + bias
    prob = softmax(pred)

    prob = list(prob[0])
    
    for label, p in zip(labels, prob):
        print(f"{label} : {float(p)*100:.2f}%")
        
    return labels[int(tf.where(pred[0] == tf.reduce_max(pred[0]))[0,0])]
    
    
predict(inputs.iloc[100]), targets.iloc[100]

In [125]:
input_ = np.array([[30, 0.5, 10, 90, 10, 1000, 1, 0]])

temp = (30 - org_mean["Temperature (C)"])/ org_std["Temperature (C)"]
humid = (0.5 - org_mean["Humidity "])
# predict(input_)

2.018313361669085