Q1) How can farming practices be optimised while promoting sustainability and reducing environmental impact?

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway


In [2]:
farmer_df = pd.read_csv('./data/farmer_advisor_dataset.csv')
market_df = pd.read_csv('./data/market_researcher_dataset.csv')

In [3]:
farmer_df.head()

Unnamed: 0,Farm_ID,Soil_pH,Soil_Moisture,Temperature_C,Rainfall_mm,Crop_Type,Fertilizer_Usage_kg,Pesticide_Usage_kg,Crop_Yield_ton,Sustainability_Score
0,1,7.073643,49.145359,26.668157,227.890912,Wheat,131.692844,2.958215,1.57692,51.913649
1,2,6.236931,21.496115,29.325342,244.017493,Soybean,136.370492,19.20477,3.824686,47.159077
2,3,5.922335,19.469042,17.666414,141.110521,Corn,99.72521,11.041066,1.133198,50.148418
3,4,6.84512,27.974234,17.188722,156.785663,Wheat,194.832396,8.806271,8.87054,89.764557
4,5,6.934171,33.637679,23.603899,77.859362,Corn,57.271267,3.747553,8.779317,51.033941


Analysis by Crop Type

In [4]:
farmer_df['Crop_Type'].unique()

array(['Wheat', 'Soybean', 'Corn', 'Rice'], dtype=object)

In [5]:
farmer_df.groupby(['Crop_Type']).size()

Crop_Type
Corn       2455
Rice       2464
Soybean    2559
Wheat      2522
dtype: int64

In [6]:
corn_df = farmer_df[farmer_df['Crop_Type'] == 'Corn'].reset_index()
rice_df = farmer_df[farmer_df['Crop_Type'] == 'Rice']
soybean_df = farmer_df[farmer_df['Crop_Type'] == 'Soybean']
wheat_df = farmer_df[farmer_df['Crop_Type'] == 'Wheat']

crop_names = ['Corn','Rice','Soybean','Wheat']
crop_dfs = [corn_df,rice_df,soybean_df,wheat_df]

Is there a discernable Crop_Yield_ton or Sustainability_Score across the crops?

In [12]:
def plot_line(df, x_value): 
    sns.lineplot(data=df.loc[:,x_value])
    # plt.xlabel('{x_value}')
    # plt.ylabel('Count')
    # plt.title('Value Counts of Categories')
    plt.show()

In [14]:
for crop_name, crop_df in zip(crop_names, crop_dfs):
    # plot_line(crop_df,'Crop_Yield_ton')
    print(F"Average Crop_Yield_ton in {crop_name}: {crop_df.loc[:,'Crop_Yield_ton'].mean()}")
    break

Average Crop_Yield_ton in Corn: 5.532547700050306


Wheat seems to have a smaller Crop_Yield_ton but don't think it is significant

In [None]:
for crop_name, crop_df in zip(crop_names, crop_dfs):
    print(F"Average Sustainability_Score in {crop_name}: {crop_df.loc[:,'Sustainability_Score'].mean()}")

Soybean seems to have lower Sustainability_Score but not sure it is significant

Testing for statistical significance

In [None]:
Crop_Yield_ton_ANOVA = f_oneway(corn_df['Crop_Yield_ton'], rice_df['Crop_Yield_ton'], soybean_df['Crop_Yield_ton'], wheat_df['Crop_Yield_ton'])

In [21]:
print(F"Crop is not statistically significant as p value of the test is: {Crop_Yield_ton_ANOVA.pvalue} >> 0.0.5 in determining Crop_Yield_ton")

Crop is not statistically significant as p value of the test is: 0.4242485393761416 >> 0.0.5 in determining Crop_Yield_ton


In [22]:
Sustainability_Score_ANOVA = f_oneway(corn_df['Sustainability_Score'], rice_df['Sustainability_Score'], soybean_df['Sustainability_Score'], wheat_df['Sustainability_Score'])

In [23]:
print(F"Crop is not statistically significant as p value of the test is: {Sustainability_Score_ANOVA.pvalue} >> 0.0.5 in determining Sustainability_Score")

Crop is not statistically significant as p value of the test is: 0.9656869114798704 >> 0.0.5 in determining Sustainability_Score
