In [37]:
import pandas as pd
import numpy as np

In [38]:
df = pd.read_csv('..\data/loss_details.csv')

  df = pd.read_csv('..\data/loss_details.csv')


### 1. Initial Data Exploration

In [39]:
print(f"ROWS, COLUMNS : \n {df.shape} \n\n")
print(f"TOP ROWS : \n {df.head()} \n\n")
print(f"COLUMN NAMES : \n {df.columns} \n\n")
print("INFO :")
df.info()   # don't wrap in print()
print(f"\n\n DESCRIPTION : \n {df.describe()} \n\n")



ROWS, COLUMNS : 
 (38, 14) 


TOP ROWS : 
   Sl. No.                     State/UT  2018-19  2019-20  % change (YoY)  \
0       1  Andaman and Nicobar Islands     0.11     0.07           -37.0   
1       2               Andhra Pradesh     0.15     1.30           797.0   
2       3            Arunachal Pradesh     0.01     0.05           566.0   
3       4                        Assam     0.81     2.39           195.0   
4       5                        Bihar     0.34     0.63            85.0   

   2020-21  % change (YoY).1  2021-22  % change (YoY).2  2022-23  \
0     0.00             -98.0     0.00            -100.0     0.00   
1     0.72             -45.0     0.29             -59.0     3.30   
2     0.38             720.0     0.00             -99.0     0.00   
3     1.68             -30.0     1.04             -38.0     0.25   
4     1.09              73.0     2.08              90.0     1.20   

   % change (YoY).3  2023-24  % change (YoY).4  2024-25 (Till Sept'24)  
0               Na

### What I Learned from the Data

##### 1. Losses are mostly in a few states
    • Stat: 2018-19 → Mean = 5.59, Median = 0.45; 2021-22 → Mean = 6.41, Median = 0.49.
    • Why: Most states lose very little, but a few states with huge losses (Max = 92–115) push up the average.
##### 2. Losses vary a lot between states
    • Stat: Std Dev 2018-19 = 16.58, 2021-22 = 19.50.
    • Why: Some states lose almost nothing, others lose huge amounts — high variation shows risk is uneven.
##### 3. Losses jump sharply year-to-year
    • Stat: YoY % change Max = 720–1022%, Std Dev = 150–220%.
    • Why: Losses aren’t gradual — some states go from tiny losses to massive losses in one year.
##### 4. Average (mean) is misleading
    • Stat: Median 2022-23 = 0.43 vs Mean = 4.73.
    • Why: The “average state” loses little; only a few states have big problems.
##### 5. Early data for 2024-25 hints at hotspots
    • Stat: Median = 0.21, Max = 101.53.
    • Why: Most states are fine so far, but some already have huge losses — these are emerging hotspots.


### FIND OUT HOTSPOTS !!!

In [40]:
df = df.drop(columns=['Sl. No.'])

df.rename(columns={
    'State/UT': 'State',
    '2018-19': 'Loss_2018_19',
    '2019-20': 'Loss_2019_20',
    '% change (YoY)': '%_Change_Loss_2019_20',
    '2020-21': 'Loss_2020_21',
    '% change (YoY).1': '%_Change_Loss_2020_21',
    '2021-22': 'Loss_2021_22',
    '% change (YoY).2': '%_Change_Loss_2021_22',
    '2022-23': 'Loss_2022_23',
    '% change (YoY).3': '%_Change_Loss_2022_23',
    '2023-24': 'Loss_2023_24',
    '% change (YoY).4': '%_Change_Loss_2023_24',
    "2024-25 (Till Sept'24)": 'Loss_2024_25'
}, inplace=True)

print(df.head())

                         State  Loss_2018_19  Loss_2019_20  \
0  Andaman and Nicobar Islands          0.11          0.07   
1               Andhra Pradesh          0.15          1.30   
2            Arunachal Pradesh          0.01          0.05   
3                        Assam          0.81          2.39   
4                        Bihar          0.34          0.63   

   %_Change_Loss_2019_20  Loss_2020_21  %_Change_Loss_2020_21  Loss_2021_22  \
0                  -37.0          0.00                  -98.0          0.00   
1                  797.0          0.72                  -45.0          0.29   
2                  566.0          0.38                  720.0          0.00   
3                  195.0          1.68                  -30.0          1.04   
4                   85.0          1.09                   73.0          2.08   

   %_Change_Loss_2021_22  Loss_2022_23  %_Change_Loss_2022_23  Loss_2023_24  \
0                 -100.0          0.00                    NaN          0.

In [42]:
print("NA BY COLUMNS : ")
df.isna().sum()

NA BY COLUMNS : 


State                    0
Loss_2018_19             5
Loss_2019_20             2
%_Change_Loss_2019_20    4
Loss_2020_21             3
%_Change_Loss_2020_21    3
Loss_2021_22             2
%_Change_Loss_2021_22    3
Loss_2022_23             2
%_Change_Loss_2022_23    5
Loss_2023_24             2
%_Change_Loss_2023_24    6
Loss_2024_25             1
dtype: int64

In [43]:

print("NA BY ROWS : ")
df.isna().sum(axis = 1)

NA BY ROWS : 


0      2
1      0
2      0
3      0
4      0
5      0
6      0
7      1
8      0
9      0
10     0
11     0
12     0
13     0
14     0
15     0
16     4
17    11
18     0
19     0
20     1
21     0
22     2
23     2
24     4
25     8
26     0
27     0
28     0
29     0
30     3
31     0
32     0
33     0
34     0
35     0
36     0
37     0
dtype: int64

- Drops rows with >5 NAs.
- Fills remaining NAs with 0. -> since value not given, we can assume there was no reported losses that year
- but looking at the dataset, only %_change has NA, so calculate that before change NA to 0

In [44]:
years = ['Loss_2018_19', 'Loss_2019_20', 'Loss_2020_21', 
         'Loss_2021_22', 'Loss_2022_23', 'Loss_2023_24', 'Loss_2024_25']

for i in range(1, len(years)):
    prev = df[years[i-1]]
    curr = df[years[i]]
    
    df[f'%_Change_{years[i]}'] = np.where(
        prev == 0,                             # condition so when prev = 0
        np.where(curr == 0, 0, float('inf')),  # condition is true -> (condn - cur = 0 , if true ->  0, else -> curr>0 → inf
        ((curr - prev) / prev) * 100           # condition is false -> prev>0 → normal % change
    )




In [35]:
df.head()

Unnamed: 0,State,Loss_2018_19,Loss_2019_20,%_Change_2019_20,Loss_2020_21,%_Change_2020_21,Loss_2021_22,%_Change_2021_22,Loss_2022_23,%_Change_2022_23,...,%_Change_Loss_2021_22,%_Change_Loss_2022_23,%_Change_Loss_2023_24,%_Change_Loss_2024_25,%_Change_Loss_Loss_2019_20,%_Change_Loss_Loss_2020_21,%_Change_Loss_Loss_2021_22,%_Change_Loss_Loss_2022_23,%_Change_Loss_Loss_2023_24,%_Change_Loss_Loss_2024_25
0,Andaman and Nicobar Islands,0.11,0.07,-37.0,0.0,-98.0,0.0,-100.0,0.0,,...,0.0,0.0,0.0,0.0,-36.363636,-100.0,0.0,0.0,0.0,0.0
1,Andhra Pradesh,0.15,1.3,797.0,0.72,-45.0,0.29,-59.0,3.3,1022.0,...,-59.722222,1037.931034,-46.666667,-56.25,766.666667,-44.615385,-59.722222,1037.931034,-46.666667,-56.25
2,Arunachal Pradesh,0.01,0.05,566.0,0.38,720.0,0.0,-99.0,0.0,7.0,...,-100.0,0.0,0.0,inf,400.0,660.0,-100.0,0.0,0.0,inf
3,Assam,0.81,2.39,195.0,1.68,-30.0,1.04,-38.0,0.25,-76.0,...,-38.095238,-75.961538,196.0,-91.891892,195.061728,-29.707113,-38.095238,-75.961538,196.0,-91.891892
4,Bihar,0.34,0.63,85.0,1.09,73.0,2.08,90.0,1.2,-42.0,...,90.825688,-42.307692,-67.5,7.692308,85.294118,73.015873,90.825688,-42.307692,-67.5,7.692308
