# **LIBRARIES AND FILE SETUP**

In [None]:
pip install tsai[extras]

In [None]:
!git clone https://github.com/fastai/fastai
!pip install -e "fastai[dev]"

In [None]:
# **************** UNCOMMENT AND RUN THIS CELL IF YOU NEED TO INSTALL/ UPGRADE TSAI ****************
stable = False # Set to True for latest pip version or False for main branch in GitHub
!pip install {"tsai -U" if stable else "git+https://github.com/timeseriesAI/tsai.git"} >> /dev/null

In [None]:
import tsai

In [None]:
from tsai.all import *

In [None]:
import fastai

In [None]:
from fastai import *
from tsai import *

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [None]:
import os
os.listdir()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
__all__ = ['df2xy', 'split_xy', 'SlidingWindowSplitter', 'SlidingWindowPanelSplitter', 'prepare_idxs',
           'prepare_sel_vars_and_steps', 'apply_sliding_window', 'df2Xy', 'split_Xy', 'df2np3d',
           'add_missing_value_cols', 'add_missing_timestamps', 'time_encoding', 'forward_gaps', 'backward_gaps',
           'nearest_gaps', 'get_gaps', 'add_delta_timestamp_cols', 'SlidingWindow', 'SlidingWindowPanel',
           'identify_padding']

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
### Set path to the correct folder

path = (r'/content/drive/MyDrive/ColabNotebooks')

In [None]:
cd /content/drive/MyDrive/ColabNotebooks

# **IMPORT PADDED BATCH FILE**

In [None]:

#### PADDED DF FROM GOOGLE DRIVE
### -------------------------------------------------------------------------------

### CHEVY BOLT
padded_df = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/padded.csv")


In [None]:
#### ANALYSIS
### -------------------------------------------------------------------------------

padded_df.head()

In [None]:

### CHECK COL NAMES
### -------------------------------------------------------------------------------

padded_df.columns

In [None]:

### GENERATE Y FOR PLOT OF DROP OFF LOCATIONS IN CHICARGO
### -------------------------------------------------------------------------------

y = padded_df.groupby("batch_id").last()["dropoff"].values

In [None]:

### PLOT GRAPH OF DROP OFF LOCATIONS
### -------------------------------------------------------------------------------

plt.figure(figsize=(15,10))
pd.DataFrame(y)[0].value_counts().plot(kind='bar')
plt.title("Distribution of Dropoff batches")

In [None]:

### CHECK COL NAMES
### -------------------------------------------------------------------------------

padded_df.columns

In [None]:

### GENERATE DATA_COL VARIABLE
### -------------------------------------------------------------------------------

data_cols = ['range_km', '20_battery', '30_battery', '40_battery', 'battery_Wh',
       'Wh_Km', 'trip_km', 'pickup', 'dropoff', 'start_daymonth',
       'start_dayday', 'start_dayhour', 'end_daymonth', 'end_dayday',
       'end_dayhour', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8',
       'batch_id', 'count']

In [None]:

### CREATE PADDED DF BY BATCH ID
### -------------------------------------------------------------------------------

padded_df = padded_df.groupby('batch_id')

In [None]:
padded_df.to_csv("/content/drive/MyDrive/ColabNotebooks/Chevy_Bolt/model_ver1.csv", index=False)

# Experiment 1:20%

In [None]:
padded_df = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/Chevy_Bolt/model_ver1.csv")

In [None]:

### ANALYSIS
### -------------------------------------------------------------------------------

padded_df.head()

In [None]:

### CREATE COMBINED_DF VARIABLE
### -------------------------------------------------------------------------------

combined_df = padded_df.groupby("batch_id")

In [None]:
combined_df.columns

In [None]:

### ANALYSIS
### -------------------------------------------------------------------------------

combined_df["batch_id"].value_counts()

In [None]:

### ANALYSIS
### -------------------------------------------------------------------------------

combined_df["batch_id"].unique()

In [None]:

### GENERATE VARIABLE CV
### -------------------------------------------------------------------------------

cv  = pd.DataFrame(combined_df.groupby("batch_id").last()["dropoff"])
cv.reset_index(inplace=True)

cv

In [None]:

### ANALYSIS
### -------------------------------------------------------------------------------

cv["dropoff"].value_counts()

In [None]:

### SPLIT DATA FOR TRAINING AND TESTING
### -------------------------------------------------------------------------------

X_train, X_val, y_train, y_val = train_test_split(cv[["batch_id"]], cv["dropoff"], test_size=0.15, random_state=123)

In [None]:

# TESTING AND TRAINING BATCH IDS
### -------------------------------------------------------------------------------

test_batch_ids = X_val["batch_id"].unique()
train_batch_ids = X_train["batch_id"].unique()

len(test_batch_ids), len(train_batch_ids)

In [None]:

### CREATE TRAINING AND TESTING DATAFRAMES FROM COMMBINED DF BATCH IDS 
### -------------------------------------------------------------------------------

train_df = combined_df[combined_df["batch_id"].isin(train_batch_ids)]
test_df = combined_df[combined_df["batch_id"].isin(test_batch_ids)]

In [None]:

### ANALYSIS
### -------------------------------------------------------------------------------

train_df.shape, test_df.shape 

In [None]:

### GENERATE VARIABLES FROM TRAIN AND TEST DF
### -------------------------------------------------------------------------------

train_y = train_df.groupby("batch_id").last()["dropoff"].values
test_y = test_df.groupby("batch_id").last()["dropoff"].values

In [None]:

### TRAINING - CREATE RES VARIABLE FOR MODEL
### -------------------------------------------------------------------------------

res = df2np3d(train_df, ['batch_id'], data_cols=data_cols )

In [None]:

### TESTING - CREATE RES VARIABLE FOR MODELLING
### -------------------------------------------------------------------------------

test_res = df2np3d(test_df, ['batch_id'], data_cols=data_cols )

In [None]:
test_df.shape

In [None]:
test_df.head()

In [None]:

### SPLIT TRAINING AND VALIDATION DATA
### -------------------------------------------------------------------------------

splits = get_splits(train_y, valid_size=.15, stratify=True, random_state=23, shuffle=True)
splits

In [None]:
### 29)
### CREATE IMAGE VARIALBES FOR CNN MODEL
### -------------------------------------------------------------------------------

tfms = [None, Categorize()]
bts = [[TSNormalize(), TSToPlot()], 
       [TSNormalize(), TSToMat(cmap='viridis')],
       [TSNormalize(), TSToGADF(cmap='spring')],
       [TSNormalize(), TSToGASF(cmap='summer')],
       [TSNormalize(), TSToMTF(cmap='autumn')],
       [TSNormalize(), TSToRP(cmap='winter')]]
btns = ['Plot', 'Mat', 'GADF', 'GASF', 'MTF', 'RP']
for i, (bt, btn) in enumerate(zip(bts, btns)):
    dsets = TSDatasets(res, mod_y, tfms=tfms, splits=splits)
    dls = TSDataLoaders.from_dsets(dsets.train,
                                   dsets.valid,
                                   bs=[64, 128],
                                   batch_tfms=bt,
                                   shuffle=False)
    xb, yb = dls.train.one_batch()
    print(f'\n\ntfm: TSTo{btn} - batch shape: {xb.shape}')
    xb[0].show()
    plt.show()

In [None]:

### CREATE TENSORS FOR MODELLING
### -------------------------------------------------------------------------------

tfms  = [None, [Categorize()]]
dsets = TSDatasets(res, train_y, tfms=tfms, splits=splits, inplace=True, )
dsets

In [None]:

### TEST VARIABLE FOR MODELLING
### -------------------------------------------------------------------------------

test_ds = dsets.add_test(X= test_res)

In [None]:

### NEEDED FOR MODELLING TEST RESULTS
### -------------------------------------------------------------------------------

test_dl = TSDataLoaders.from_dsets(test_ds, batch_tfms=[TSStandardize()])

In [None]:

### NEED FOR BILSTM MODE !
### TRAINING DATA LOADER FOR MODEL
### -------------------------------------------------------------------------------

dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, batch_tfms=[TSStandardize()], num_workers=0)

In [None]:

### TEST DATALOADER
### -------------------------------------------------------------------------------

dls.test = dls.add_test(X= test_res)

In [None]:

### PLOT GRAPHS FROM TRAINING DATA
### -------------------------------------------------------------------------------

dls.show_batch(sharey=True)

# **BiLSTM - TRAINING AND VALIDATION**

In [None]:
### NEED FOR BILSTM MODE !
### TRAINING DATA LOADER FOR MODEL
### -------------------------------------------------------------------------------

dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, batch_tfms=[TSStandardize()], num_workers=0)

In [None]:
### CREATE MODEL VARIABLE FOR BILSTM MODELLING
### -------------------------------------------------------------------------------

model = LSTM(dls.vars, dls.c,n_layers=2, bidirectional=True)
model

In [None]:
### BILSTM MODELLING - CREATE LEARN VARIABLE FOR FUTURE MODELS
### -------------------------------------------------------------------------------

learn = Learner(dls, model, metrics=accuracy,  loss_func=CrossEntropyLossFlat())
start = time.time()
learn.fit_one_cycle(10, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

In [None]:
### BILSTM MODELLING - USING LEARN VARIABLE FROM PREVIOPUS MODEL
### -------------------------------------------------------------------------------

learn.fit_one_cycle(100, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

# **TEST DATA RESULTS**

In [None]:
### Import model learner

learn = load_learner('/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/pre_trained_learner.pkl')

In [None]:
op = learn.get_X_preds(test_res, with_decoded=True)

In [None]:
import ast
y_pred = ast.literal_eval(op[2])

In [None]:
y_pred[:30]

In [None]:
y_pred_new = []
for y in y_pred:
    y_pred_new.append(int(y))

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
test_y_new = [int(y) for y in test_y]

In [None]:
### PRETRAINED MODEL
### Pretrained model 100 - 20% TEST

accuracy_score(test_y_new, y_pred_new)

In [None]:
### NOT PRETRAINED MODEL
### Not Pretrained model 100 - 20% TEST

accuracy_score(test_y_new, y_pred_new)

# **30% BATTERY CAPACITY PREDICTION**

In [None]:
### PRE TRAINED LEARNER
### -----------------------------------------------------

learn = load_learner('/content/drive/MyDrive/ColabNotebooks/Chevy_Bolt/not_pretrained_learner.pkl')

In [None]:
### LOAD TEST DATA

mask_df = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/Chevy_Bolt/mask100.csv") 

In [None]:
mask_df.columns

In [None]:
mask_df.shape

In [None]:
### Drop columns
### -----------------------------------------------------------

to_remove = ["30_battery", "40_battery"]
mask_df.drop(to_remove, axis=1, inplace=True)

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

cv  = pd.DataFrame(mask_df.groupby("batch_id").last()["dropoff"])
cv.reset_index(inplace=True)

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

X_train, X_val, y_train, y_val = train_test_split(cv[["batch_id"]], cv["dropoff"], test_size=0.15, random_state=123)

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

test_batch_ids = X_val["batch_id"].unique()

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

test_df = mask_df[mask_df["batch_id"].isin(test_batch_ids)]

In [None]:
test_df.shape

In [None]:
y = test_df.groupby("batch_id").last()["dropoff"].values

In [None]:
# low_val = pd.DataFrame(y)[0].value_counts().iloc[-20:].index

low_val = pd.DataFrame(y)[0].value_counts().iloc[0:].index

In [None]:
### CHECK COLUMNS IN DF

test_df.columns

In [None]:
test_df["batch_id"].value_counts()

In [None]:
### MAY NOT NEED ALREADY DONE

data_cols = ['range_km', '20_battery', '30_battery', '40_battery', '70_battery',
       '100_battery', 'battery_Wh', 'Wh_Km', 'trip_km', 'pickup', 'dropoff',
       'start_daymonth', 'start_dayday', 'start_dayhour', 'end_daymonth',
       'end_dayday', 'end_dayhour', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7',
       'b8', 'batch_id', 'count']

In [None]:
mask_30 = test_df.groupby('batch_id')

In [None]:
test_30_y = mask_30.groupby("batch_id").last()["dropoff"].values

In [None]:
mask_30.shape

In [None]:
test_res = df2np3d(mask_30, ['batch_id'], data_cols=data_cols )

In [None]:
op = learn.get_X_preds(test_res, with_decoded=True)

In [None]:
import ast
y_pred = ast.literal_eval(op[2])

In [None]:
y_pred_new = []
for y in y_pred:
    y_pred_new.append(int(y))

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
test_30_y_new = [int(y) for y in test_30_y]

In [None]:
len(test_30_y_new), len(y_pred_new)

In [None]:
### PRE TRAINED LEARNER
### 15% TEST DATASET 

accuracy_score(test_30_y_new, y_pred_new)

In [None]:
### NOT PRE TRAINED LEARNER
### 15% TEST DATA

accuracy_score(test_30_y_new, y_pred_new)

# **40 % BATTERY CAPACITY PREDICTION**

In [None]:
### Import model learner

learn = load_learner('/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/pre_trained_learner.pkl')

In [None]:
### LOAD CSV FILE AS DF

mask_40_df = pd.read_csv("/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/mask40.csv")

In [None]:
mask_40_df.shape

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

cv  = pd.DataFrame(mask_40_df.groupby("batch_id").last()["dropoff"])
cv.reset_index(inplace=True)

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

X_train, X_val, y_train, y_val = train_test_split(cv[["batch_id"]], cv["dropoff"], test_size=0.15, random_state=123)

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

test_batch_ids = X_val["batch_id"].unique()

In [None]:
### NEED TO SPLIT DATA FOR TEST PERCENTAGE

test_df = mask_40_df[mask_40_df["batch_id"].isin(test_batch_ids)]

In [None]:
### DO

y = test_df.groupby("batch_id").last()["dropoff"].values

In [None]:
test_df.columns

In [None]:
### MAY NOT NEED ALREADY DONE

data_cols = ['range_km', '20_battery', '30_battery', '40_battery', 'battery_Wh',
       'Wh_Km', 'trip_km', 'pickup', 'dropoff', 'start_daymonth',
       'start_dayday', 'start_dayhour', 'end_daymonth', 'end_dayday',
       'end_dayhour', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8',
       'batch_id', 'count']

In [None]:
mask_40 = test_df.groupby('batch_id')

In [None]:
test_40_y = test_df.groupby("batch_id").last()["dropoff"].values

In [None]:
test_res = df2np3d(mask_40, ['batch_id'], data_cols=data_cols )

In [None]:
op = learn.get_X_preds(test_res, with_decoded=True)

In [None]:
import ast
y_pred = ast.literal_eval(op[2])

In [None]:
y_pred_new = []
for y in y_pred:
    y_pred_new.append(int(y))

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
test_40_y_new = [int(y) for y in test_40_y]

In [None]:
len(test_40_y_new), len(y_pred_new)

In [None]:
### PRE TRAINED LEARNER
### 15% DATASET @ 40% MASKED

accuracy_score(test_40_y_new, y_pred_new)

In [None]:
### NOT PRE TRAINED LEARNER
### 15% DATASET @ 40% MASKED

accuracy_score(test_40_y_new, y_pred_new)

# **CNN MODELLING**

# **Pre-trained Version - XResNet18**

In [None]:
### USE FOR MODELLING CNN
### -------------------------------------------------------------------------------

dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, batch_tfms=[TSNormalize(), TSToMTF(cmap='autumn')], num_workers=0)

In [None]:
### USE THIS FOR BEST LOOKING PLOTS USING NEXT CELL
### -------------------------------------------------------------------------------

dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, batch_tfms=[TSNormalize(), TSToPlot()], num_workers=0)

In [None]:
### RUN TO CREATE BATCH PLOTS
### -------------------------------------------------------------------------------

dls.show_batch()

In [None]:
### CNN PRE-TRAINED MODEL AND LEARN VARIABLES
### -------------------------------------------------------------------------------

model = create_model(xresnet18, dls=dls, pretrained=True)
learn = Learner(dls, model, metrics=accuracy)

In [None]:
### CNN PRE-TRAINED MODELLING
### -------------------------------------------------------------------------------

start = time.time()
learn.fit_one_cycle(10, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

In [None]:
### CNN PRE-TRAINED MODELLING
### -------------------------------------------------------------------------------

learn.fit_one_cycle(10, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

In [None]:
### Export model learner 
### -------------------------------------------------------------------------------

learn.export('/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/pre_trained_learner.pkl')

# **NOT Pre-trained Version - XResNet18**

In [None]:
### MODEL AND LEARN VARIABLES FOR MODEL
### -------------------------------------------------------------------------------

model = create_model(xresnet18, dls=dls, pretrained=False)
learn = Learner(dls, model, metrics=accuracy)

In [None]:
### MODELLING
### -------------------------------------------------------------------------------

learn.fit_one_cycle(15, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

In [None]:
### MODELLING
### -------------------------------------------------------------------------------

learn.fit_one_cycle(20, lr_max=1e-3)
print(f"\ntraining time: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}")
learn.plot_metrics()

In [None]:
### Export model learner 
### -------------------------------------------------------------------------------

learn.export('/content/drive/MyDrive/ColabNotebooks/Hyundai_Kona_Electric/not_pretrained_learner.pkl')