In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime

In [2]:
df = pd.read_csv("/content/Rainfall_Data_LL.csv")

In [3]:
df1 = df.loc[df['SUBDIVISION'] == 'Madhya Maharashtra'].iloc[:, 2:16]
df2 = pd.melt(df1, id_vars='YEAR', value_vars=df1.columns[1:-1])
df2['Date'] = df2['variable'] + ' ' + df2['YEAR'].astype(str)
df2.loc[:,'Date'] = df2['Date'].apply(lambda x : datetime.strptime(x, '%b %Y'))
df2.columns=['Year', 'Month', 'Rainfall', 'Date']
df2.sort_values(by='Date', inplace=True)
df3 = df2.drop(columns = ["Month", "Year"])
df3.set_index("Date", inplace = True)

In [4]:
fig = go.Figure(layout = go.Layout(yaxis=dict(range=[0, df2['Rainfall'].max()+1])))
fig.add_trace(go.Scatter(x=df2['Date'], y=df2['Rainfall']), )
fig.update_layout(title='Rainfall Throught Timeline:',
                 xaxis_title='Time', yaxis_title='Rainfall in mm')
fig.update_layout(xaxis=go.layout.XAxis(
    rangeselector=dict(
        buttons=list([dict(label="Whole View", step="all"),
                      dict(count=1,label="One Year View",step="year",stepmode="todate")
                     ])),
        rangeslider=dict(visible=True),type="date")
)
fig.show()

In [5]:
fig = px.box(df2, 'Month', 'Rainfall')
fig.update_layout(title='Minimum, Maximum and Median Monthly Rainfall.')
fig.show()

In [6]:
fig = px.line(df2, 'Year', 'Rainfall', facet_col='Month', facet_col_wrap=4)
fig.update_layout(title='Monthly rainfall throught history:')
fig.show()

In [7]:
pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [8]:
from pmdarima import auto_arima
train = df3.iloc[:-90]
test = df3.iloc[-90:]

In [9]:
model2 = auto_arima(y = df3.Rainfall, m = 12)

In [10]:
predictions3 = pd.Series(model2.predict(n_periods = 12))

In [11]:
predictions3

2018-01-01      6.359538
2018-02-01      7.962404
2018-03-01     16.135652
2018-04-01     10.282179
2018-05-01     19.218089
2018-06-01    133.203245
2018-07-01    236.990507
2018-08-01    178.230102
2018-09-01    156.734497
2018-10-01     68.563454
2018-11-01     19.368133
2018-12-01      9.221024
Freq: MS, dtype: float64

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
pip install bz2file

Collecting bz2file
  Downloading bz2file-0.98.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bz2file
  Building wheel for bz2file (setup.py) ... [?25l[?25hdone
  Created wheel for bz2file: filename=bz2file-0.98-py3-none-any.whl size=6868 sha256=fe39a59d124c335200f725fa74e68274a4025385adb6e354bcc247df4af3cf94
  Stored in directory: /root/.cache/pip/wheels/69/ee/f7/6fccd10cb65421ba2da64fa6caf8ee7fbae0059884af8c8587
Successfully built bz2file
Installing collected packages: bz2file
Successfully installed bz2file-0.98


In [14]:
import bz2file as bz2
import pickle

In [15]:
def compressed_pickle(title, data):
  with bz2.BZ2File(title + '.pbz2', 'w') as f:
    pickle.dump(data, f)

In [16]:
compressed_pickle('model2', model2)

In [17]:
def decompress_pickle(file):
  data = bz2.BZ2File(file, 'rb')
  data = pickle.load(data)
  return data

In [18]:
model = decompress_pickle('model2.pbz2')

In [19]:
model.predict(n_periods = 12)

2018-01-01      6.359538
2018-02-01      7.962404
2018-03-01     16.135652
2018-04-01     10.282179
2018-05-01     19.218089
2018-06-01    133.203245
2018-07-01    236.990507
2018-08-01    178.230102
2018-09-01    156.734497
2018-10-01     68.563454
2018-11-01     19.368133
2018-12-01      9.221024
Freq: MS, dtype: float64