# Weather forecasting

## Data

### Setting up kaggle API

In [None]:
from google.colab import files 
files.upload() #upload your kaggle.json file

In [2]:
!mkdir ~/.kaggle #create a directory called .kaggle in the root folder
!cp kaggle.json ~/.kaggle/ #copy kaggle.json to this folder
!chmod 600 ~/.kaggle/kaggle.json #add full rights to this copied file

In [3]:
!rm kaggle.json #remove the original one

### Download dataset using kaggle API command

In [4]:
!kaggle datasets download -d vonline9/weather-istanbul-data-20092019 #paste the kaggle API command

Downloading weather-istanbul-data-20092019.zip to /content
  0% 0.00/71.6k [00:00<?, ?B/s]
100% 71.6k/71.6k [00:00<00:00, 26.9MB/s]


In [5]:
!unzip weather-istanbul-data-20092019.zip  #unzip the zip file

Archive:  weather-istanbul-data-20092019.zip
  inflating: Istanbul Weather Data.csv  


In [6]:
!rm weather-istanbul-data-20092019.zip #remove the zip file

### Data loading

In [None]:
!pip install pandas #well known pandas library, used for data processing, wrangling .... by data scientists

In [44]:
import pandas as pd

df = pd.read_csv('/content/Istanbul Weather Data.csv',sep=",")  #read the csv file
df.head() #show the 5 first rows of the dataframe

Unnamed: 0,DateTime,Condition,Rain,MaxTemp,MinTemp,SunRise,SunSet,MoonRise,MoonSet,AvgWind,AvgHumidity,AvgPressure
0,02.09.2019,Partly cloudy,0.0,27,22,06:32:00,19:37:00,9:52:00,21:45:00,23,66,1012
1,01.09.2019,Partly cloudy,0.0,27,22,06:31:00,19:38:00,8:37:00,21:13:00,21,66,1011
2,31.08.2019,Patchy rain possible,0.5,26,22,06:30:00,19:40:00,7:21:00,20:40:00,22,63,1015
3,30.08.2019,Partly cloudy,0.0,27,22,06:29:00,19:42:00,6:4:00,20:5:00,20,64,1016
4,29.08.2019,Partly cloudy,0.0,27,23,06:27:00,19:43:00,4:47:00,19:26:00,24,61,1015


In [45]:
df = df[["Condition","MinTemp","AvgHumidity","AvgPressure"]] #select only the useful columns
df.head()

Unnamed: 0,Condition,MinTemp,AvgHumidity,AvgPressure
0,Partly cloudy,22,66,1012
1,Partly cloudy,22,66,1011
2,Patchy rain possible,22,63,1015
3,Partly cloudy,22,64,1016
4,Partly cloudy,23,61,1015


In [46]:
df.shape #print the dimensions of this dataframe

(3896, 4)

### Data processing

In [47]:
df = df.dropna() #remove empty rows

#Convert each category into an interger
for ind in df.index:
  if df["Condition"][ind]=='Sunny':
    df["Condition"][ind] = 0
  elif df["Condition"][ind]=='Partly cloudy':
    df["Condition"][ind] = 1
  elif df["Condition"][ind]=='Cloudy':
    df["Condition"][ind] = 2
  elif df["Condition"][ind]=='Overcast':
    df["Condition"][ind] = 3
  elif df["Condition"][ind]=='Patchy rain possible':
    df["Condition"][ind] = 4
  else:
    df = df.drop([ind]) #we don't consider other classes so we drop it

#Cast this column to int
df["Condition"] = df["Condition"].astype(int)

df.head() #show the 5 first rows of the dataframe

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://

Unnamed: 0,Condition,MinTemp,AvgHumidity,AvgPressure
0,1,22,66,1012
1,1,22,66,1011
2,4,22,63,1015
3,1,22,64,1016
4,1,23,61,1015


In [48]:
from keras.utils import to_categorical

labels = to_categorical(df.pop('Condition')) #Create classes from the labels

import numpy as np #import numpy library, used for arithmetic

features = np.array(df) #convert our dataframe into ndarray, only array type that neural network takes as input

In [49]:
features

array([[  22,   66, 1012],
       [  22,   66, 1011],
       [  22,   63, 1015],
       ...,
       [   3,   94, 1021],
       [   1,   89, 1021],
       [   2,   90, 1027]])

In [50]:
labels

array([[0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       ...,
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0.]], dtype=float32)

In [51]:
from sklearn.model_selection import train_test_split


#Split the dataset into training set 85% and test set 15%
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.15,shuffle=True) 

## Model

In [None]:
import tensorflow as tf

### Building the model

In [52]:
#Parameters :
NB_classes = 5 #number of outputs
NB_neurones = 30 #main number of neurones
NB_features = 3 #number of inputs
activation_func = tf.keras.activations.relu #activation function used

#Densly connected neural network
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func,input_shape=(NB_features,)),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dropout(0.4), #drop randomly some connection to avoid overfiting
                             #softmax will output an array containing probabilities of each classes
                             #the highest one is the predicted class
                             tf.keras.layers.Dense(NB_classes,activation=tf.keras.activations.softmax)
])

model.compile(optimizer="adam",loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) #compile the model

model.summary() #to see the paramter of our model


Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_195 (Dense)            (None, 30)                120       
_________________________________________________________________
dense_196 (Dense)            (None, 30)                930       
_________________________________________________________________
dense_197 (Dense)            (None, 30)                930       
_________________________________________________________________
dense_198 (Dense)            (None, 30)                930       
_________________________________________________________________
dense_199 (Dense)            (None, 30)                930       
_________________________________________________________________
dense_200 (Dense)            (None, 30)                930       
_________________________________________________________________
dense_201 (Dense)            (None, 30)              

### Training the model

In [53]:
model.fit(x=train_features,
          y=train_labels,
          epochs=20,
          validation_data=(test_features,test_labels),
          verbose=1,
          shuffle=True) #Train our model

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f7053a8af10>

In [54]:
performance=model.evaluate(test_features,test_labels, batch_size=32, verbose=1, steps=None, )[1] * 100
print('Final accuracy : ', round(performance), '%')

Final accuracy :  60 %


## Converting the model

### Tensorflow lite

In [57]:
converter = tf.lite.TFLiteConverter.from_keras_model(model) #create a converter
tflite_model = converter.convert() #convert the model without quantization (for one of the next video)


open("/content/tflite_model.tflite","wb").write(tflite_model) #Create a file containing our tflite model

INFO:tensorflow:Assets written to: /tmp/tmpp68jbr0q/assets


INFO:tensorflow:Assets written to: /tmp/tmpp68jbr0q/assets


101796

### Header (c++) file

In [None]:
!apt-get install -qq xxd #installing the tool

Selecting previously unselected package xxd.
(Reading database ... 160980 files and directories currently installed.)
Preparing to unpack .../xxd_2%3a8.0.1453-1ubuntu1.4_amd64.deb ...
Unpacking xxd (2:8.0.1453-1ubuntu1.4) ...
Setting up xxd (2:8.0.1453-1ubuntu1.4) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...


In [56]:
!echo "const unsigned char model[] = {" > /content/model.h
!cat /content/tflite_model.tflite | xxd -i >> /content/model.h #create an hexadecimal array containing all our parameters
!echo "};" >> /content/model.h

files.download("/content/model.h") #automaticly download your file

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>