Dataset download link:  https://www.kaggle.com/datasets/rajanand/rainfall-in-india

In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv("district wise rainfall normal.csv")
data.info()
data.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 641 entries, 0 to 640
Data columns (total 19 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   STATE_UT_NAME  641 non-null    object 
 1   DISTRICT       641 non-null    object 
 2   JAN            641 non-null    float64
 3   FEB            641 non-null    float64
 4   MAR            641 non-null    float64
 5   APR            641 non-null    float64
 6   MAY            641 non-null    float64
 7   JUN            641 non-null    float64
 8   JUL            641 non-null    float64
 9   AUG            641 non-null    float64
 10  SEP            641 non-null    float64
 11  OCT            641 non-null    float64
 12  NOV            641 non-null    float64
 13  DEC            641 non-null    float64
 14  ANNUAL         641 non-null    float64
 15  Jan-Feb        641 non-null    float64
 16  Mar-May        641 non-null    float64
 17  Jun-Sep        641 non-null    float64
 18  Oct-Dec   

(641, 19)

In [4]:
data.head()

Unnamed: 0,STATE_UT_NAME,DISTRICT,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,ANDAMAN And NICOBAR ISLANDS,NICOBAR,107.3,57.9,65.2,117.0,358.5,295.5,285.0,271.9,354.8,326.0,315.2,250.9,2805.2,165.2,540.7,1207.2,892.1
1,ANDAMAN And NICOBAR ISLANDS,SOUTH ANDAMAN,43.7,26.0,18.6,90.5,374.4,457.2,421.3,423.1,455.6,301.2,275.8,128.3,3015.7,69.7,483.5,1757.2,705.3
2,ANDAMAN And NICOBAR ISLANDS,N & M ANDAMAN,32.7,15.9,8.6,53.4,343.6,503.3,465.4,460.9,454.8,276.1,198.6,100.0,2913.3,48.6,405.6,1884.4,574.7
3,ARUNACHAL PRADESH,LOHIT,42.2,80.8,176.4,358.5,306.4,447.0,660.1,427.8,313.6,167.1,34.1,29.8,3043.8,123.0,841.3,1848.5,231.0
4,ARUNACHAL PRADESH,EAST SIANG,33.3,79.5,105.9,216.5,323.0,738.3,990.9,711.2,568.0,206.9,29.5,31.7,4034.7,112.8,645.4,3008.4,268.1


### Q1. State-Level Rainfall Extremes

For each State/UT, determine:

1. The district with the highest annual rainfall.

2. The district with the lowest annual rainfall.

3. The difference between highest and lowest rainfall within that state.


In [5]:
highest_rain_res = data.loc[data.groupby("STATE_UT_NAME")["ANNUAL"].idxmax()][
    ["STATE_UT_NAME", "DISTRICT", "ANNUAL"]
].sort_values("ANNUAL", ascending=False)
lowest_rain_res = data.loc[data.groupby("STATE_UT_NAME")["ANNUAL"].idxmin()][
    ["STATE_UT_NAME", "DISTRICT", "ANNUAL"]
].sort_values("ANNUAL", ascending=True)

highest_rain_res = highest_rain_res.rename(
    columns={
        "DISTRICT": "HIGHEST_RAINFALL_DISTRICT",
        "ANNUAL": "HIGHEST_ANNUAL_RAINFALL",
    }
)
lowest_rain_res = lowest_rain_res.rename(
    columns={"DISTRICT": "LOWEST_RAINFALL_DISTRICT", "ANNUAL": "LOWEST_ANNUAL_RAINFALL"}
)
merged_rainfall = pd.merge(
    highest_rain_res, lowest_rain_res, on="STATE_UT_NAME", how="inner"
)

merged_rainfall["Difference"] = (
    merged_rainfall["HIGHEST_ANNUAL_RAINFALL"]
    - merged_rainfall["LOWEST_ANNUAL_RAINFALL"]
)

merged_rainfall.head(3)

Unnamed: 0,STATE_UT_NAME,HIGHEST_RAINFALL_DISTRICT,HIGHEST_ANNUAL_RAINFALL,LOWEST_RAINFALL_DISTRICT,LOWEST_ANNUAL_RAINFALL,Difference
0,MANIPUR,TAMENGLONG,7229.3,THOUBAL,1286.3,5943.0
1,MEGHALAYA,JAINTIA HILLS,6379.9,RI-BHOI,2116.9,4263.0
2,ARUNACHAL PRADESH,UPPER SIANG,4402.1,SUBANSIRI F.D,1300.4,3101.7


### Q2. Seasonal Rainfall Comparison

For each State/UT, calculate:

1. Which district has the highest rainfall in the Monsoon season (Jun–Sep)?

2. Which district has the lowest rainfall in the Winter season (Jan–Feb)?


In [6]:
highest_rain_mansoon = data.loc[data.groupby("STATE_UT_NAME")["Jun-Sep"].idxmax()][
    ["STATE_UT_NAME", "DISTRICT", "Jun-Sep"]
].sort_values("Jun-Sep", ascending=False)
lowest_rain_winter = data.loc[data.groupby("STATE_UT_NAME")["Jan-Feb"].idxmin()][
    ["STATE_UT_NAME", "DISTRICT", "Jan-Feb"]
].sort_values("Jan-Feb", ascending=False)

highest_rain_mansoon = highest_rain_mansoon.rename(
    columns={"DISTRICT": "HIGHEST RF IN MONSOON"}
)
lowest_rain_winter = lowest_rain_winter.rename(
    columns={"DISTRICT": "LOWEST RF IN WINTER"}
)

merged_rainfall = pd.merge(
    highest_rain_mansoon, lowest_rain_winter, on="STATE_UT_NAME", how="inner"
)
merged_rainfall.head(2)

Unnamed: 0,STATE_UT_NAME,HIGHEST RF IN MONSOON,Jun-Sep,LOWEST RF IN WINTER,Jan-Feb
0,MANIPUR,TAMENGLONG,5228.0,CHANDEL,36.4
1,MEGHALAYA,JAINTIA HILLS,4667.5,SOUTH GARO HI,17.5


3. Compute the percentage contribution of Monsoon rainfall (Jun–Sep) to Annual rainfall for each district.


In [7]:
data["Monsoon_%"] = (data["Jun-Sep"] / data["ANNUAL"]) * 100
data[["DISTRICT", "Jun-Sep", "ANNUAL", "Monsoon_%"]].head(5)

Unnamed: 0,DISTRICT,Jun-Sep,ANNUAL,Monsoon_%
0,NICOBAR,1207.2,2805.2,43.034365
1,SOUTH ANDAMAN,1757.2,3015.7,58.268395
2,N & M ANDAMAN,1884.4,2913.3,64.682662
3,LOHIT,1848.5,3043.8,60.730009
4,EAST SIANG,3008.4,4034.7,74.563165


### Q3. Monthly Rainfall Trends

For each State/UT, analyze monthly rainfall patterns across districts:

1. Find the month (Jan–Dec) in which each state receives the highest average rainfall (averaged across its districts).

Example: Andaman & Nicobar Islands might peak in August.

2. Find the month with the lowest average rainfall for each state.


In [8]:
montly_cols = [
    "JAN",
    "FEB",
    "MAR",
    "APR",
    "MAY",
    "JUN",
    "JUL",
    "AUG",
    "SEP",
    "OCT",
    "NOV",
    "DEC",
]

state_rf = data.groupby("STATE_UT_NAME")[montly_cols].sum().reset_index()

state_rf["Peak Month"] = state_rf[montly_cols].idxmax(axis=1)
state_rf["Peak Avg Rainfall"] = state_rf[montly_cols].max(axis=1)

state_rf["Lowest Month"] = state_rf[montly_cols].idxmin(axis=1)
state_rf["Lowest Avg Rainfall"] = state_rf[montly_cols].min(axis=1)

state_rf[
    [
        "STATE_UT_NAME",
        "Peak Month",
        "Peak Avg Rainfall",
        "Lowest Month",
        "Lowest Avg Rainfall",
    ]
].head(3)

Unnamed: 0,STATE_UT_NAME,Peak Month,Peak Avg Rainfall,Lowest Month,Lowest Avg Rainfall
0,ANDAMAN And NICOBAR ISLANDS,SEP,1265.2,MAR,92.4
1,ANDHRA PRADESH,JUL,4263.4,JAN,145.4
2,ARUNACHAL PRADESH,JUL,8761.3,DEC,575.3


3. For each district, compute the coefficient of variation (CV) of rainfall across 12 months:

   This tells you which districts have highly variable rainfall vs. which have uniform rainfall throughout the year.


In [9]:
data["mean_rf"] = data[montly_cols].mean(axis=1)
data["Std Dev"] = np.sqrt(
    ((data[montly_cols].sub(data["mean_rf"], axis=0)) ** 2).mean(axis=1)
)
data["CV (%)"] = (data["Std Dev"] /data["mean_rf"])*100

data[["DISTRICT", "mean_rf", "Std Dev","CV (%)"]].head(3)

Unnamed: 0,DISTRICT,mean_rf,Std Dev,CV (%)
0,NICOBAR,233.766667,108.973042,46.616159
1,SOUTH ANDAMAN,251.308333,170.730096,67.936504
2,N & M ANDAMAN,242.775,189.271684,77.961769


### Q4: Identify the top 5 districts with the highest rainfall variability.

In [10]:
data.nlargest(5,"CV (%)").reset_index()[["DISTRICT","CV (%)"]]

Unnamed: 0,DISTRICT,CV (%)
0,SIROHI,155.070032
1,SAWAI MADHOPUR,154.103683
2,PATAN(MHSN),152.503338
3,KARAULI,152.119101
4,AJMER,152.078739


### Q5: Identify the top 5 districts with the highest and lowest annual rainfall

highest

In [12]:
data.nlargest(5,"ANNUAL").reset_index()[["STATE_UT_NAME","DISTRICT","ANNUAL"]]

Unnamed: 0,STATE_UT_NAME,DISTRICT,ANNUAL
0,MANIPUR,TAMENGLONG,7229.3
1,MEGHALAYA,JAINTIA HILLS,6379.9
2,MEGHALAYA,EAST KHASI HI,6166.1
3,ARUNACHAL PRADESH,UPPER SIANG,4402.1
4,KARNATAKA,UDUPI,4306.0


lowest

In [None]:
data.nsmallest(5, "ANNUAL").reset_index()[["STATE_UT_NAME", "DISTRICT", "ANNUAL"]]

Unnamed: 0,STATE_UT_NAME,DISTRICT,ANNUAL
0,JAMMU AND KASHMIR,LADAKH (LEH),94.6
1,RAJASTHAN,JAISALMER,181.2
2,JAMMU AND KASHMIR,KARGIL,223.3
3,RAJASTHAN,SRI GANGANAGA,252.9
4,RAJASTHAN,BARMER,268.6


### Q6. Find the district(s) that received rainfall greater than the state average in the ANNUAL column.

In [22]:
state_avg_rf = data.merge(
    data.groupby("STATE_UT_NAME")["ANNUAL"].mean().rename("State_Avg_Annual"),
    left_on="STATE_UT_NAME",
    right_index=True,
)

above_state_avg_rf_dist = state_avg_rf[
    state_avg_rf["ANNUAL"] > state_avg_rf["State_Avg_Annual"]
]

above_state_avg_rf_dist[
    ["STATE_UT_NAME", "DISTRICT", "ANNUAL", "State_Avg_Annual"]
].sort_values("ANNUAL", ascending=False).reset_index().head()

Unnamed: 0,index,STATE_UT_NAME,DISTRICT,ANNUAL,State_Avg_Annual
0,55,MANIPUR,TAMENGLONG,7229.3,2496.633333
1,47,MEGHALAYA,JAINTIA HILLS,6379.9,3682.842857
2,46,MEGHALAYA,EAST KHASI HI,6166.1,3682.842857
3,12,ARUNACHAL PRADESH,UPPER SIANG,4402.1,2927.375
4,598,KARNATAKA,UDUPI,4306.0,1194.616667


### Q7. Find the top 3 districts with the highest rainfall in each state.

In [None]:
data.groupby("STATE_UT_NAME").apply(
    lambda x: x.nlargest(3, "ANNUAL").reset_index(drop=True)
)[["DISTRICT", "ANNUAL"]].head(6)

  data.groupby("STATE_UT_NAME").apply(lambda x: x.nlargest(3,"ANNUAL").reset_index(drop=True))[["DISTRICT","ANNUAL"]].head(6)


Unnamed: 0_level_0,Unnamed: 1_level_0,DISTRICT,ANNUAL
STATE_UT_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ANDAMAN And NICOBAR ISLANDS,0,SOUTH ANDAMAN,3015.7
ANDAMAN And NICOBAR ISLANDS,1,N & M ANDAMAN,2913.3
ANDAMAN And NICOBAR ISLANDS,2,NICOBAR,2805.2
ANDHRA PRADESH,0,SRIKAKULAM,1165.2
ANDHRA PRADESH,1,WEST GODAVARI,1160.4
ANDHRA PRADESH,2,VIZIANAGARAM,1140.4


### Q8. Find the bottom 2 districts with the lowest rainfall in each state.

In [None]:
data.groupby("STATE_UT_NAME").apply(
    lambda x: x.nsmallest(2, "ANNUAL").reset_index(drop=True)
)[["DISTRICT", "ANNUAL"]].head(6)

  data.groupby("STATE_UT_NAME").apply(lambda x: x.nsmallest(2,"ANNUAL").reset_index(drop=True))[["DISTRICT","ANNUAL"]].head(6)


Unnamed: 0_level_0,Unnamed: 1_level_0,DISTRICT,ANNUAL
STATE_UT_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ANDAMAN And NICOBAR ISLANDS,0,NICOBAR,2805.2
ANDAMAN And NICOBAR ISLANDS,1,N & M ANDAMAN,2913.3
ANDHRA PRADESH,0,ANANTAPUR,572.7
ANDHRA PRADESH,1,KURNOOL,679.6
ARUNACHAL PRADESH,0,SUBANSIRI F.D,1300.4
ARUNACHAL PRADESH,1,LOW SUBANSIRI,1921.1


### Q9: Find the average annual rainfall for each STATE_UT_NAME

In [None]:
data.groupby("STATE_UT_NAME")["ANNUAL"].mean().sort_values(
    ascending=False
).reset_index().head(3)

Unnamed: 0,STATE_UT_NAME,ANNUAL
0,MEGHALAYA,3682.842857
1,GOA,3278.5
2,KERALA,2937.392857


### Q10 Find for each state/UT:
Minimum annual rainfall
Maximum annual rainfall
Average annual rainfall

and return the result in descending order of average rainfall.

In [None]:
state_rf = (
    data.groupby("STATE_UT_NAME")["ANNUAL"]
    .agg(["min", "max", "mean"])
    .sort_values("mean", ascending=False)
    .reset_index()
)
state_rf.columns = ["STATE_UT_NAME", "MIN RF", "MAX RF", "AVG RF"]

state_rf.head(6)

Unnamed: 0,STATE_UT_NAME,MIN RF,MAX RF,AVG RF
0,MEGHALAYA,2116.9,6379.9,3682.842857
1,GOA,3085.6,3471.4,3278.5
2,KERALA,1803.2,3621.6,2937.392857
3,ARUNACHAL PRADESH,1300.4,4402.1,2927.375
4,ANDAMAN And NICOBAR ISLANDS,2805.2,3015.7,2911.4
5,SIKKIM,2582.2,3094.5,2838.35


### Q10. Find the top 5 states/UTs with the highest average annual rainfall and lowest average annual rainfall (two separate results).

In [48]:
data.groupby("STATE_UT_NAME")["ANNUAL"].mean().nlargest(5).reset_index()

Unnamed: 0,STATE_UT_NAME,ANNUAL
0,MEGHALAYA,3682.842857
1,GOA,3278.5
2,KERALA,2937.392857
3,ARUNACHAL PRADESH,2927.375
4,ANDAMAN And NICOBAR ISLANDS,2911.4


In [49]:
data.groupby("STATE_UT_NAME")["ANNUAL"].mean().nsmallest(5).reset_index()

Unnamed: 0,STATE_UT_NAME,ANNUAL
0,RAJASTHAN,581.59697
1,HARYANA,614.557143
2,PUNJAB,648.545
3,DELHI,747.1
4,GUJARAT,924.342308
