In [1]:
import pandas as pd

Loading and Exploring the Dataset

In [2]:
rainfall_data = pd.read_csv('rainfall_area-wt_India_1901-2015.csv')

In [3]:
rainfall_data.head()

Unnamed: 0,REGION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,INDIA,1901,34.7,37.7,18.0,39.3,50.8,113.4,242.2,272.9,124.4,52.7,38.0,8.3,1032.3,72.4,108.1,752.8,99.0
1,INDIA,1902,7.4,4.3,19.0,43.5,48.3,108.8,284.0,199.7,201.5,61.5,27.9,24.4,1030.2,11.7,110.8,794.0,113.8
2,INDIA,1903,17.0,8.3,31.3,17.1,59.5,118.3,297.0,270.4,199.1,117.9,36.9,17.7,1190.5,25.3,107.9,884.8,172.5
3,INDIA,1904,14.4,9.6,31.8,33.1,72.4,164.8,261.0,206.4,129.6,69.0,11.2,16.3,1019.8,24.0,137.4,761.8,96.6
4,INDIA,1905,25.3,20.9,42.7,33.7,55.7,93.3,252.8,200.8,178.4,51.4,9.7,10.5,975.3,46.2,132.2,725.4,71.6


Analyzing Annual Rainfall Trends Over Time

In [4]:
import plotly.graph_objects as go
import plotly.express as px

# analyze trends in annual rainfall over time
annual_rainfall = rainfall_data[['YEAR', 'ANNUAL']]

fig_annual = go.Figure()
fig_annual.add_trace(go.Scatter(
    x=annual_rainfall['YEAR'],
    y=annual_rainfall['ANNUAL'],
    mode='lines',
    name='Annual Rainfall',
    line=dict(color='blue', width=2),
    opacity=0.7
))

fig_annual.add_trace(go.Scatter(
    x=annual_rainfall['YEAR'],
    y=[annual_rainfall['ANNUAL'].mean()] * len(annual_rainfall),
    mode='lines',
    name='Mean Rainfall',
    line=dict(color='red', dash='dash')
))

fig_annual.update_layout(
    title='Trend in Annual Rainfall in India (1901-2015)',
    xaxis_title='Year',
    yaxis_title='Rainfall (mm)',
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)
fig_annual

In [10]:
# identify months with the highest and lowest rainfall on average
monthly_columns = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG',
                    'OCT', 'NOV', 'DEC']
monthly_avg = rainfall_data[monthly_columns].mean()

highest_rainfall_month = monthly_avg.idxmax()
lowest_raingall_month  = monthly_avg.idxmin()
monthly_avg

JAN     19.759130
FEB     23.434783
MAR     28.254783
APR     38.241739
MAY     62.193913
JUN    168.360000
JUL    291.022609
AUG    258.400870
OCT     75.701739
NOV     29.205217
DEC     14.980000
dtype: float64

In [17]:
fig_monthly = px.bar(
    x=monthly_avg.index,
    y=monthly_avg.values,
    labels={'x': 'Month', 'y': 'Rainfall (mm)'},
    title='Average Monthly Rainfall in India (1901-2015)',
    text=monthly_avg.values
)
fig_monthly.add_hline(
    y=monthly_avg.mean(),
    line_dash='dash',
    line_color='red',
    annotation_text='Mean Rainfall',
    annotation_position='top right'
)
fig_monthly.update_traces(marker_color='skyblue', marker_line_color='black', marker_line_width=2)
fig_monthly.update_layout(template='plotly_white', height=500)
fig_monthly

In [19]:
# Seasonal rainfall distribution
seasonal_columns = ['Jan-Feb', 'Mar-May', 'Jun-Sep', 'Oct-Dec']
seasonal_avg = rainfall_data[seasonal_columns].mean()
seasonal_avg

Jan-Feb     43.189565
Mar-May    128.694783
Jun-Sep    890.260870
Oct-Dec    119.882609
dtype: float64

In [26]:
fig_seasonal = px.bar(
    x=seasonal_avg.index,
    y=seasonal_avg.values,
    labels={'x': 'Season', 'y': 'Rainfall (mm)'},
    title='Seasonal Rainfall Distribution in India (1901-2015)',
    text=seasonal_avg.values,
    color=seasonal_avg.values,
    color_continuous_scale=['gold', 'skyblue', 'green', 'orange']
)
fig_seasonal.add_hline(
    y=seasonal_avg.mean(),
    line_dash='dash',
    line_color='red',
    annotation_text = 'Mean Seasonal Rainfall',
    annotation_position = 'top left'
)

fig_seasonal.update_traces(marker_line_color='black', marker_line_width=1)
fig_seasonal.update_layout(
    template='plotly_white',
    height=500,
    coloraxis_colorbar=dict(title='mm')
)
fig_seasonal

Assessing the Impact of Climate Change in the Rainfall Trends in India

In [30]:
# Calculating rolling averages to assess climate change impact
rainfall_data['10-Year Rolling Avg'] = rainfall_data['ANNUAL'].rolling(window=10).mean()
rainfall_data['10-Year Rolling Avg']

0          NaN
1          NaN
2          NaN
3          NaN
4          NaN
        ...   
110    1121.61
111    1136.88
112    1141.05
113    1137.29
114    1123.36
Name: 10-Year Rolling Avg, Length: 115, dtype: float64

In [34]:
fig_climate_change = go.Figure()

fig_climate_change.add_trace(go.Scatter(
    x=rainfall_data['YEAR'],
    y=rainfall_data['ANNUAL'],
    mode='lines',
    name='Annual Rainfall',
    line=dict(color='blue', width=3),
    opacity=0.6
))

fig_climate_change.add_trace(go.Scatter(
    x=rainfall_data['YEAR'],
    y=rainfall_data['10-Year Rolling Avg'],
    mode='lines',
    name='10-Year Rolling Avg',
    line=dict(color='red', width=3)
))

fig_climate_change.update_layout(
    title='Impact of Climate Change on Rainfall Patterns (1901-2015)',
    xaxis_title = 'Year',
    yaxis_title = 'Rainfall (mm)',
    template='plotly_white',
    legend=dict(title='Legend'),
    height=500
)

fig_climate_change

Using statistical thresholds, to identify years with extreme or deficient rainfall(1.5 standard deviations below or above the mean).

In [35]:
from scipy.stats import pearsonr

# Identifyig drought and extreme rainfall years
mean_rainfall = rainfall_data['ANNUAL'].mean()