In [5]:
import numpy as np
import pandas as pd

data = pd.read_csv(r'C:\Users\Lenovo\Desktop\AgriCast Predictive Analytics for Crop Production\Data\Processed\crop_production_processed_data.csv')

In [6]:
data.head()

Unnamed: 0,State_Name,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Other Kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Cashewnut,720.0,165.0


In [7]:
data.shape

(162157, 7)

In [8]:
data.isnull().sum()

State_Name       0
District_Name    0
Crop_Year        0
Season           0
Crop             0
Area             0
Production       0
dtype: int64

## Univariate Analysis

- States: 

In [9]:
data['State_Name'].unique().shape
# There are 33 total states in this dataset.

(33,)

In [10]:
total = data['State_Name'].value_counts().sum()
top_10 = data['State_Name'].value_counts()[:10].sum()

print(f"Top 10 states production contribution is {np.round((top_10/total)*100, 2)}%")

Top 10 states production contribution is 71.57%


- District Name:

In [11]:
data['District_Name'].value_counts().shape

(646,)

- Crop Year:

In [12]:
data['Crop_Year'].max() - data['Crop_Year'].min()

# We have data of 18 years.

np.int64(18)

- Season

In [13]:
data['Season'].value_counts()

Season
Kharif         59972
Rabi           43818
Whole Year     41036
Summer         11594
Winter          3076
Autumn          2661
Name: count, dtype: int64

- Crops

In [14]:
data['Crop'].value_counts().shape

(123,)

- Area

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2, cols=2, subplot_titles=("Histogram of area: ", "Box Plot of Area", "Violin Plot of Area"))

fig.add_trace(go.Histogram(x=data['Area']), row=1, col=1)
fig.add_trace(go.Box(x=data['Area']), row=1, col=2)
fig.add_trace(go.Violin(x=data['Area']), row=2, col=1)

fig.update_layout(
    width=800,
    height=500
)

fig.show()

- Production

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2, cols=2, subplot_titles=("Histogram of area: ", "Box Plot of Production", "Violin Plot of Production"))

fig.add_trace(go.Histogram(x=data['Production']), row=1, col=1)
fig.add_trace(go.Box(x=data['Production']), row=1, col=2)
fig.add_trace(go.Violin(x=data['Production']), row=2, col=1)

fig.update_layout(
    width=800,
    height=500
)

fig.show()

## Bivariate Analysis

In [17]:
import plotly.express as px

values = data.groupby(by="State_Name")["Area"].sum().sort_values(ascending=False)[:10].values
index = data.groupby(by="State_Name")["Area"].sum().sort_values(ascending=False)[:10].index

fig = px.bar(x=index, y=values, title="Top 10 States Covering the Largest Area:")

fig.update_layout(
    xaxis_tickangle=-90,
    xaxis_tickmode='linear',
    width=1000,
    height=600
)

fig.show()

In [18]:
values = data.groupby(by="State_Name")["Production"].sum().sort_values(ascending=False)[:10].values
index = data.groupby(by="State_Name")["Production"].sum().sort_values(ascending=False)[:10].index

fig = px.bar(x=index, y=values, title="Top 10 most productive states")

fig.update_layout(
    xaxis_tickangle=-90,
    xaxis_tickmode='linear',
    width=1000,
    height=600
)

fig.show()

In [19]:
production = data.groupby(by="Season")['Production'].sum().values
seasons = data.groupby(by="Season")['Production'].sum().index

fig = px.bar(x=seasons, y=production, color=seasons, title="In which season the maximum production takes place:")

fig.update_layout(
    xaxis_tickangle=-90,
    xaxis_tickmode='linear',
    width=800,
    height=400
)

fig.show()

In [20]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

crops = data.groupby(by="Crop")["Area"].sum().sort_values(ascending=False).index
areas = data.groupby(by="Crop")["Area"].sum().sort_values(ascending=False).values

fig = make_subplots(rows=1, cols=2, subplot_titles=("Top 10 Crops Occupying Maximum Area: ", "Top 10 Crops Occupying Minimum Area: "))

fig.add_trace(go.Bar(x=crops[:10], y=areas[:10]), row=1, col=1)
fig.add_trace(go.Bar(x=crops[-10:-1][::-1], y=areas[-10:-1][::-1]), row=1, col=2)

fig.update_layout(
    width=1200,
    height=600
)

fig.show()

In [21]:
crops = data.groupby(by="Crop")["Production"].sum().sort_values(ascending=False).index
production = data.groupby(by="Crop")["Production"].sum().sort_values(ascending=False).values

fig = px.bar(x=crops[:10], y=production[:10], color=crops[:10], title="Top 10 Highest producing crops:")

fig.update_layout(
    xaxis_tickangle=-90,
    xaxis_tickmode='linear',
    width=800,
    height=400
)

fig.show()

In [22]:
year = data.groupby(by="Crop_Year")["Production"].sum()

fig = px.bar(x=year.index, y=year.values, title="Production By Years: ")

fig.update_layout(
    width=800,
    height=500
)

fig.show()

In [23]:
districts = data.groupby(by="District_Name")["Production"].sum().sort_values(ascending=False).index
district_production = data.groupby(by="District_Name")["Production"].sum().sort_values(ascending=False).values

fig = px.bar(x=districts[:10], y=district_production[:10], title="Top 10 most productions districts:")

fig.update_layout(
    xaxis_tickangle=-90,
    xaxis_tickmode='linear',
    width=600,
    height=500,
    xaxis_title = "Districts",
    yaxis_title = "Total Production By Districts"
)

fig.show()

In [24]:
data.head()

Unnamed: 0,State_Name,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Other Kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,Rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,Cashewnut,720.0,165.0
