In [145]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as sns

# Cleaning Yield dataset

In [146]:
df = pd.read_csv("./Data/yield_data.csv")

In [147]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Crop                  896 non-null    object 
 1   District              896 non-null    object 
 2   Year                  896 non-null    object 
 3   Season                896 non-null    object 
 4   Area(Hectare)         896 non-null    int64  
 5   Production(Tonne)     894 non-null    float64
 6   Yield(Tonne/Hectare)  894 non-null    float64
dtypes: float64(2), int64(1), object(4)
memory usage: 49.1+ KB


In [148]:
print(f"Crops: {df["Crop"].unique()}-->Total:{df["Crop"].nunique()}\n\n\nDistricts: {df["District"].unique()}-->Total:{df["District"].nunique()}\n\n\n{df["Year"].unique()}-->Total:{df["Year"].nunique()}\n\n\nSeasons: {df["Season"].unique()}-->Total:{df["Season"].nunique()}")

Crops: ['Bajra' 'Cotton' 'Groundnut' 'Maize' 'Ragi' 'Rice' 'Turmeric'
 'Castor seed' 'Gram' 'Moong(Green Gram)' 'Safflower' 'Sunflower']-->Total:12


Districts: ['Anantapur' 'Chittoor' 'East godavari' 'Guntur' 'Kadapa' 'Kurnool'
 'Prakasam' 'Spsr nellore' 'Srikakulam' 'Visakhapatanam' 'Vizianagaram'
 'Krishna' 'West godavari' ' Chittoor' ' Guntur' ' Krishna'
 ' Spsr nellore' ' Visakhapatanam' ' Vizianagaram' ' West godavari'
 ' Kurnool' ' Prakasam' ' Srikakulam']-->Total:23


['2014 - 2015' '2015 - 2016' '2016 - 2017' '2017 - 2018' '2018 - 2019'
 '2019 - 2020' '2020 - 2021']-->Total:7


Seasons: ['Kharif' 'Rabi']-->Total:2


### Removing the extra space in the districts column values

In [149]:
## Considering the first year in the Year column
df["Year"] = df["Year"].apply(lambda x: x.split("-")[0]).astype(int)


## Removing the extra spaces
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)  ## removing the starting and tailing spaces
df = df.apply(lambda x: x.str.replace(" ", "_") if x.dtype == "object" else x)  ## Replacing the inbetween spaces with "_"
df = df.apply(lambda x: x.str.lower() if x.dtype == "object" else x) ## Lowercasing the values in string typed columns

In [150]:
print(f"Crops: {df["Crop"].unique()}-->Total:{df["Crop"].nunique()}\n\n\nDistricts: {df["District"].unique()}-->Total:{df["District"].nunique()}\n\n\n{df["Year"].unique()}-->Total:{df["Year"].nunique()}\n\n\nSeasons: {df["Season"].unique()}-->Total:{df["Season"].nunique()}")

Crops: ['bajra' 'cotton' 'groundnut' 'maize' 'ragi' 'rice' 'turmeric'
 'castor_seed' 'gram' 'moong(green_gram)' 'safflower' 'sunflower']-->Total:12


Districts: ['anantapur' 'chittoor' 'east_godavari' 'guntur' 'kadapa' 'kurnool'
 'prakasam' 'spsr_nellore' 'srikakulam' 'visakhapatanam' 'vizianagaram'
 'krishna' 'west_godavari']-->Total:13


[2014 2015 2016 2017 2018 2019 2020]-->Total:7


Seasons: ['kharif' 'rabi']-->Total:2


In [151]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Crop                  896 non-null    object 
 1   District              896 non-null    object 
 2   Year                  896 non-null    int64  
 3   Season                896 non-null    object 
 4   Area(Hectare)         896 non-null    int64  
 5   Production(Tonne)     894 non-null    float64
 6   Yield(Tonne/Hectare)  894 non-null    float64
dtypes: float64(2), int64(2), object(3)
memory usage: 49.1+ KB


#### Doing mean imputation for Production and Yield columns

In [152]:

df[["Yield(Tonne/Hectare)", "Production(Tonne)"]] = df[["Yield(Tonne/Hectare)", "Production(Tonne)"]].fillna(df[["Yield(Tonne/Hectare)", "Production(Tonne)"]].mean())

In [153]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Crop                  896 non-null    object 
 1   District              896 non-null    object 
 2   Year                  896 non-null    int64  
 3   Season                896 non-null    object 
 4   Area(Hectare)         896 non-null    int64  
 5   Production(Tonne)     896 non-null    float64
 6   Yield(Tonne/Hectare)  896 non-null    float64
dtypes: float64(2), int64(2), object(3)
memory usage: 49.1+ KB


In [None]:
df.to_csv("CleanedData/cleaned_yield_data.csv", index=False)

# Cleaning ICRISAT-District dataset

In [238]:
df = pd.read_csv("Data/ICRISAT-District_Level_Data_NCA.csv") 

In [239]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 6 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Dist_Code                   77 non-null     int64  
 1   Year                        77 non-null     int64  
 2   State_Code                  77 non-null     int64  
 3   State_Name                  77 non-null     object 
 4   Dist_Name                   77 non-null     object 
 5   Fertilizer(KG_per_hectare)  77 non-null     float64
dtypes: float64(1), int64(3), object(2)
memory usage: 3.7+ KB


#### It does not have any null values, so I am just renaming the column names matches with the yield_dataset

In [240]:
df.rename(columns={"Dist_Name":"District"}, inplace=True)

In [241]:
df.head()

Unnamed: 0,Dist_Code,Year,State_Code,State_Name,District,Fertilizer(KG_per_hectare)
0,44,2014,1,Andhra Pradesh,Srikakulam,183.22
1,44,2015,1,Andhra Pradesh,Srikakulam,167.77
2,44,2016,1,Andhra Pradesh,Srikakulam,168.19
3,44,2017,1,Andhra Pradesh,Srikakulam,174.21
4,44,2018,1,Andhra Pradesh,Srikakulam,187.29


In [242]:
df["District"].unique()

array(['Srikakulam', 'Visakhapatnam', 'East Godavari', 'West Godavari',
       'Krishna', 'Guntur', 'S.P.S. Nellore', 'Kurnool', 'Ananthapur',
       'Kadapa YSR', 'Chittoor'], dtype=object)

In [243]:
df["District"].nunique()

11

In [244]:
df_yield = pd.read_csv("CleanedData/clean_yield_data.csv")

In [245]:
df_yield["District"].unique()

array(['anantapur', 'chittoor', 'east_godavari', 'guntur', 'kadapa',
       'kurnool', 'prakasam', 'spsr_nellore', 'srikakulam',
       'visakhapatanam', 'vizianagaram', 'krishna', 'west_godavari'],
      dtype=object)

In [246]:
df["District"].unique()

array(['Srikakulam', 'Visakhapatnam', 'East Godavari', 'West Godavari',
       'Krishna', 'Guntur', 'S.P.S. Nellore', 'Kurnool', 'Ananthapur',
       'Kadapa YSR', 'Chittoor'], dtype=object)

In [247]:
## Renaming the values in district column

df["District"] = df["District"].replace({"Srikakulam":"srikakulam", "Visakhapatnam":"visakhapatanam", "East Godavari":"east_godavari", "West Godavari":"west_godavari", "Krishna":"krishna",
                   "Guntur":"guntur", "S.P.S. Nellore":"spsr_nellore", "Kurnool":"kurnool", "Ananthapur":"anantapur", "Kadapa YSR":"kadapa", "Chittoor":"chittoor"})

In [248]:
df["District"].unique()

array(['srikakulam', 'visakhapatanam', 'east_godavari', 'west_godavari',
       'krishna', 'guntur', 'spsr_nellore', 'kurnool', 'anantapur',
       'kadapa', 'chittoor'], dtype=object)

In [249]:
df.head()

Unnamed: 0,Dist_Code,Year,State_Code,State_Name,District,Fertilizer(KG_per_hectare)
0,44,2014,1,Andhra Pradesh,srikakulam,183.22
1,44,2015,1,Andhra Pradesh,srikakulam,167.77
2,44,2016,1,Andhra Pradesh,srikakulam,168.19
3,44,2017,1,Andhra Pradesh,srikakulam,174.21
4,44,2018,1,Andhra Pradesh,srikakulam,187.29


In [250]:
df.to_csv("CleanedData/cleaned_ICRISAT_Fertilizers_dataset.csv", index=False)

# Cleaning Annual temperature dataset

In [329]:
df = pd.read_csv("Data/annual_temp_complete_new.csv") 

In [330]:
df.head()

Unnamed: 0,District,Year,Annual_Temp
0,Srikakulam,2014,29.5
1,Srikakulam,2015,30.75
2,Srikakulam,2016,30.928571
3,Srikakulam,2017,30.552632
4,Srikakulam,2018,30.776596


In [331]:
df["District"].unique()

array(['Srikakulam', 'Vizianagaram', 'Visakhapatanam', 'East godavari',
       'West godavari', 'Krishna', 'Guntur', 'Prakasam', 'SPSR Nellore',
       'Chittoor', 'Kadapa', 'Kurnool', 'Anantapur'], dtype=object)

In [None]:
df.columns = df.columns.str.strip() ## Removing the tailing spaces
df["District"] = df["District"].replace({"Srikakulam":"srikakulam", "Vizianagaram":"vizianagaram", "Visakhapatanam":"visakhapatanam", "East Godavari":"east_godavari", 
                                        "East godavari":"east_godavari", "West Godavari":"west_godavari","West godavari":"west_godavari" ,"Krishna":"krishna",
                   "Guntur":"guntur","Prakasam":"prakasam", "SPSR Nellore":"spsr_nellore", "Kurnool":"kurnool", "Anantapur":"anantapur", "Kadapa":"kadapa", "Chittoor":"chittoor", "Anantapur":"anantapur"}) 



In [None]:
df["Annual_Temp"] = df["Annual_Temp"].round(2) ## roundig off the annual temperature values 

In [335]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91 entries, 0 to 90
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   District     91 non-null     object 
 1   Year         91 non-null     int64  
 2   Annual_Temp  91 non-null     float64
dtypes: float64(1), int64(1), object(1)
memory usage: 2.3+ KB


In [336]:
df["District"].unique()

array(['srikakulam', 'vizianagaram', 'visakhapatanam', 'east_godavari',
       'west_godavari', 'krishna', 'guntur', 'prakasam', 'spsr_nellore',
       'chittoor', 'kadapa', 'kurnool', 'anantapur'], dtype=object)

In [337]:
df.to_csv("CleanedData/cleaned_AnnualTemparature.csv", index=False)

# Cleaning the rainfall dataset

In [327]:
df = pd.read_csv("Data/annual_rainfall.csv")

In [328]:
df.head()


Unnamed: 0,Dist Code,Year,State Code,State Name,Dist Name,ANNUAL_RAINFALL(Millimeters)
0,44,2014,1,Andhra Pradesh,Srikakulam,803.0
1,44,2015,1,Andhra Pradesh,Srikakulam,1621.7
2,44,2016,1,Andhra Pradesh,Srikakulam,934.6
3,44,2017,1,Andhra Pradesh,Srikakulam,1289.2
4,44,2018,1,Andhra Pradesh,Srikakulam,1421.9


In [294]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Dist Code                     77 non-null     int64  
 1   Year                          77 non-null     int64  
 2   State Code                    77 non-null     int64  
 3   State Name                    77 non-null     object 
 4   Dist Name                     77 non-null     object 
 5   ANNUAL_RAINFALL(Millimeters)  77 non-null     float64
dtypes: float64(1), int64(3), object(2)
memory usage: 3.7+ KB


In [296]:
df.rename(columns={"Dist Name": "District"}, inplace=True)

In [299]:
df["District"].unique()

array(['Srikakulam', 'Visakhapatnam', 'East Godavari', 'West Godavari',
       'Krishna', 'Guntur', 'S.P.S. Nellore', 'Kurnool', 'Ananthapur',
       'Kadapa YSR', 'Chittoor'], dtype=object)

In [304]:

df["District"] = df["District"].replace({"Srikakulam":"srikakulam", "Visakhapatnam":"visakhapatanam", "East Godavari":"east_godavari", 
                                        "East godavari":"east_godavari", "West Godavari":"west_godavari","Krishna":"krishna",
                   "Guntur":"guntur", "S.P.S. Nellore":"spsr_nellore", "Kurnool":"kurnool", "Ananthapur":"anantapur", "Kadapa YSR":"kadapa", "Chittoor":"chittoor"})

In [305]:
df["District"].unique()         

array(['srikakulam', 'visakhapatanam', 'east_godavari', 'west_godavari',
       'krishna', 'guntur', 'spsr_nellore', 'kurnool', 'anantapur',
       'kadapa', 'chittoor'], dtype=object)

In [308]:
df.to_csv("CleanedData/cleaned_annual_rainfall.csv", index=False)

# Combining the dataset

In [338]:
df_backbone = pd.read_csv("CleanedData/clean_yield_data.csv")
df_temp = pd.read_csv("CleanedData/cleaned_AnnualTemparature.csv")
df_fertilizer = pd.read_csv("CleanedData/cleaned_ICRISAT_Fertilizers_dataset.csv")
df_rainfall = pd.read_csv("CleanedData/cleaned_annual_rainfall.csv")


In [339]:
df_backbone.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Crop                  896 non-null    object 
 1   District              896 non-null    object 
 2   Year                  896 non-null    int64  
 3   Season                896 non-null    object 
 4   Area(Hectare)         896 non-null    int64  
 5   Production(Tonne)     896 non-null    float64
 6   Yield(Tonne/Hectare)  896 non-null    float64
dtypes: float64(2), int64(2), object(3)
memory usage: 49.1+ KB


In [340]:
df_backbone.head()

Unnamed: 0,Crop,District,Year,Season,Area(Hectare),Production(Tonne),Yield(Tonne/Hectare)
0,bajra,anantapur,2014,kharif,1752,1028.0,0.59
1,bajra,anantapur,2015,kharif,3226,3326.0,1.03
2,bajra,anantapur,2016,kharif,2467,1258.0,0.51
3,bajra,anantapur,2017,kharif,5261,7034.0,1.34
4,bajra,anantapur,2018,kharif,2979,3447.0,1.16


In [341]:
df_temp.head()

Unnamed: 0,District,Year,Annual_Temp
0,srikakulam,2014,29.5
1,srikakulam,2015,30.75
2,srikakulam,2016,30.93
3,srikakulam,2017,30.55
4,srikakulam,2018,30.78


In [342]:
set(df_backbone["District"].values) - set(df_temp["District"].values)

set()

In [343]:
set(df_temp["District"].values) - set(df_backbone["District"].values) 

set()

In [344]:
set(df_backbone["Year"].values) - set(df_temp["Year"])

set()

In [345]:
set(df_temp["Year"]) - set(df_backbone["Year"].values)

set()

In [346]:
df_backbone_one = df_backbone.merge(df_temp, on=["District", "Year"], how="outer")

In [347]:
df_backbone_one.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Crop                  896 non-null    object 
 1   District              896 non-null    object 
 2   Year                  896 non-null    int64  
 3   Season                896 non-null    object 
 4   Area(Hectare)         896 non-null    int64  
 5   Production(Tonne)     896 non-null    float64
 6   Yield(Tonne/Hectare)  896 non-null    float64
 7   Annual_Temp           896 non-null    float64
dtypes: float64(3), int64(2), object(3)
memory usage: 56.1+ KB


In [348]:
df_fertilizer["District"].unique()

array(['srikakulam', 'visakhapatanam', 'east_godavari', 'west_godavari',
       'krishna', 'guntur', 'spsr_nellore', 'kurnool', 'anantapur',
       'kadapa', 'chittoor'], dtype=object)

In [349]:
set(df_fertilizer["District"]) - set(df_backbone_one["District"])

set()

In [350]:
set(df_backbone_one["District"]) - set(df_fertilizer["District"])

{'prakasam', 'vizianagaram'}

In [351]:
set(df_fertilizer["Year"]) - set(df_backbone_one["Year"])

set()

In [352]:
set(df_backbone_one["Year"]) - set(df_fertilizer["Year"])

set()

In [353]:
df_fertilizer.head()

Unnamed: 0,Dist_Code,Year,State_Code,State_Name,District,Fertilizer(KG_per_hectare)
0,44,2014,1,Andhra Pradesh,srikakulam,183.22
1,44,2015,1,Andhra Pradesh,srikakulam,167.77
2,44,2016,1,Andhra Pradesh,srikakulam,168.19
3,44,2017,1,Andhra Pradesh,srikakulam,174.21
4,44,2018,1,Andhra Pradesh,srikakulam,187.29


In [354]:
df_backbone_two = df_backbone_one.merge(right = df_fertilizer[["Year","District", "Fertilizer(KG_per_hectare)"]], on=["Year","District"], how="outer")

In [355]:
df_backbone_two.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Crop                        896 non-null    object 
 1   District                    896 non-null    object 
 2   Year                        896 non-null    int64  
 3   Season                      896 non-null    object 
 4   Area(Hectare)               896 non-null    int64  
 5   Production(Tonne)           896 non-null    float64
 6   Yield(Tonne/Hectare)        896 non-null    float64
 7   Annual_Temp                 896 non-null    float64
 8   Fertilizer(KG_per_hectare)  750 non-null    float64
dtypes: float64(4), int64(2), object(3)
memory usage: 63.1+ KB


In [356]:
df_backbone_two.head()

Unnamed: 0,Crop,District,Year,Season,Area(Hectare),Production(Tonne),Yield(Tonne/Hectare),Annual_Temp,Fertilizer(KG_per_hectare)
0,bajra,anantapur,2014,kharif,1752,1028.0,0.59,30.5,115.13
1,cotton,anantapur,2014,kharif,73734,103779.0,1.41,30.5,115.13
2,groundnut,anantapur,2014,kharif,550794,148714.0,0.27,30.5,115.13
3,groundnut,anantapur,2014,kharif,122578,72811.0,0.59,30.5,115.13
4,maize,anantapur,2014,kharif,16839,42687.0,2.54,30.5,115.13


In [357]:
df_backbone_two.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Crop                        896 non-null    object 
 1   District                    896 non-null    object 
 2   Year                        896 non-null    int64  
 3   Season                      896 non-null    object 
 4   Area(Hectare)               896 non-null    int64  
 5   Production(Tonne)           896 non-null    float64
 6   Yield(Tonne/Hectare)        896 non-null    float64
 7   Annual_Temp                 896 non-null    float64
 8   Fertilizer(KG_per_hectare)  750 non-null    float64
dtypes: float64(4), int64(2), object(3)
memory usage: 63.1+ KB


In [360]:
df_rainfall["District"].unique()

array(['srikakulam', 'visakhapatanam', 'east_godavari', 'west_godavari',
       'krishna', 'guntur', 'spsr_nellore', 'kurnool', 'anantapur',
       'kadapa', 'chittoor'], dtype=object)

In [366]:
df_rainfall.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 77 entries, 0 to 76
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Dist Code                     77 non-null     int64  
 1   Year                          77 non-null     int64  
 2   State Code                    77 non-null     int64  
 3   State Name                    77 non-null     object 
 4   District                      77 non-null     object 
 5   ANNUAL_RAINFALL(Millimeters)  77 non-null     float64
dtypes: float64(1), int64(3), object(2)
memory usage: 3.7+ KB


In [361]:
set(df_backbone_two["District"].unique()) - set(df_rainfall["District"].unique())

{'prakasam', 'vizianagaram'}

In [362]:
set(df_rainfall["District"].unique()) - set(df_backbone_two["District"].unique())

set()

In [368]:
set(df_backbone_two["Year"]) - set(df_rainfall["Year"])

set()

In [369]:
df_backbone_three = df_backbone_two.merge(right=df_rainfall[["Year", "District", "ANNUAL_RAINFALL(Millimeters)"]], on=["Year", "District"], how="outer")

In [370]:
df_backbone_three.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Crop                          896 non-null    object 
 1   District                      896 non-null    object 
 2   Year                          896 non-null    int64  
 3   Season                        896 non-null    object 
 4   Area(Hectare)                 896 non-null    int64  
 5   Production(Tonne)             896 non-null    float64
 6   Yield(Tonne/Hectare)          896 non-null    float64
 7   Annual_Temp                   896 non-null    float64
 8   Fertilizer(KG_per_hectare)    750 non-null    float64
 9   ANNUAL_RAINFALL(Millimeters)  750 non-null    float64
dtypes: float64(5), int64(2), object(3)
memory usage: 70.1+ KB


In [375]:
df_backbone_three[["Fertilizer(KG_per_hectare)", "ANNUAL_RAINFALL(Millimeters)"]] = df_backbone_three[["Fertilizer(KG_per_hectare)", "ANNUAL_RAINFALL(Millimeters)"]].fillna(df_backbone_three[["Fertilizer(KG_per_hectare)", "ANNUAL_RAINFALL(Millimeters)"]].mean())

In [377]:
df_backbone_three.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 10 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Crop                          896 non-null    object 
 1   District                      896 non-null    object 
 2   Year                          896 non-null    int64  
 3   Season                        896 non-null    object 
 4   Area(Hectare)                 896 non-null    int64  
 5   Production(Tonne)             896 non-null    float64
 6   Yield(Tonne/Hectare)          896 non-null    float64
 7   Annual_Temp                   896 non-null    float64
 8   Fertilizer(KG_per_hectare)    896 non-null    float64
 9   ANNUAL_RAINFALL(Millimeters)  896 non-null    float64
dtypes: float64(5), int64(2), object(3)
memory usage: 70.1+ KB


In [379]:
df_backbone_three.head(5)

Unnamed: 0,Crop,District,Year,Season,Area(Hectare),Production(Tonne),Yield(Tonne/Hectare),Annual_Temp,Fertilizer(KG_per_hectare),ANNUAL_RAINFALL(Millimeters)
0,bajra,anantapur,2014,kharif,1752,1028.0,0.59,30.5,115.13,575.0
1,cotton,anantapur,2014,kharif,73734,103779.0,1.41,30.5,115.13,575.0
2,groundnut,anantapur,2014,kharif,550794,148714.0,0.27,30.5,115.13,575.0
3,groundnut,anantapur,2014,kharif,122578,72811.0,0.59,30.5,115.13,575.0
4,maize,anantapur,2014,kharif,16839,42687.0,2.54,30.5,115.13,575.0


In [380]:
df_backbone_three.to_csv("CleanedData/final_data.csv", index=False)

In [381]:
df = pd.read_csv("CleanedData/final_data.csv")

In [382]:
df.head()

Unnamed: 0,Crop,District,Year,Season,Area(Hectare),Production(Tonne),Yield(Tonne/Hectare),Annual_Temp,Fertilizer(KG_per_hectare),ANNUAL_RAINFALL(Millimeters)
0,bajra,anantapur,2014,kharif,1752,1028.0,0.59,30.5,115.13,575.0
1,cotton,anantapur,2014,kharif,73734,103779.0,1.41,30.5,115.13,575.0
2,groundnut,anantapur,2014,kharif,550794,148714.0,0.27,30.5,115.13,575.0
3,groundnut,anantapur,2014,kharif,122578,72811.0,0.59,30.5,115.13,575.0
4,maize,anantapur,2014,kharif,16839,42687.0,2.54,30.5,115.13,575.0
