How can agriculture be transformed into a resource-efficient and profitable endeavour?

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway


In [2]:
farmer_df = pd.read_csv('./data/farmer_advisor_dataset.csv')
market_df = pd.read_csv('./data/market_researcher_dataset.csv')

In [3]:
farmer_df.head()

Unnamed: 0,Farm_ID,Soil_pH,Soil_Moisture,Temperature_C,Rainfall_mm,Crop_Type,Fertilizer_Usage_kg,Pesticide_Usage_kg,Crop_Yield_ton,Sustainability_Score
0,1,7.073643,49.145359,26.668157,227.890912,Wheat,131.692844,2.958215,1.57692,51.913649
1,2,6.236931,21.496115,29.325342,244.017493,Soybean,136.370492,19.20477,3.824686,47.159077
2,3,5.922335,19.469042,17.666414,141.110521,Corn,99.72521,11.041066,1.133198,50.148418
3,4,6.84512,27.974234,17.188722,156.785663,Wheat,194.832396,8.806271,8.87054,89.764557
4,5,6.934171,33.637679,23.603899,77.859362,Corn,57.271267,3.747553,8.779317,51.033941


In [4]:
market_df.head(10)

Unnamed: 0,Market_ID,Product,Market_Price_per_ton,Demand_Index,Supply_Index,Competitor_Price_per_ton,Economic_Indicator,Weather_Impact_Score,Seasonal_Factor,Consumer_Trend_Index
0,1,Rice,180.251212,196.0859,199.509124,300.549219,1.093636,28.47481,Medium,148.472131
1,2,Rice,420.52797,188.4524,150.789483,492.097798,0.526307,70.978063,High,97.298888
2,3,Wheat,457.260398,171.179384,78.989326,323.003342,1.292393,80.853592,Low,131.113236
3,4,Soybean,237.179113,196.970677,50.464363,232.978384,0.627663,60.676069,Low,95.169417
4,5,Wheat,324.032925,113.165416,145.878647,312.428652,1.491255,45.379516,Low,130.297138
5,6,Corn,389.648205,123.088729,190.906475,181.935439,0.87615,81.369855,Low,77.246215
6,7,Rice,155.895188,70.198811,96.268017,265.91718,1.228814,88.33181,High,107.44704
7,8,Rice,321.726728,138.080562,52.25519,187.675745,1.477785,14.62419,Low,103.239493
8,9,Soybean,447.411773,188.317545,171.829493,210.502956,0.707803,29.150677,Low,141.323695
9,10,Corn,218.127868,58.501856,92.439833,428.672876,0.791494,78.894967,High,80.221156


In [5]:
crop_names = ['Corn','Rice','Soybean','Wheat']

market_corn_df = market_df[market_df['Product'] == 'Corn'].reset_index(drop=True)
market_rice_df = market_df[market_df['Product'] == 'Rice'].reset_index(drop=True)
market_soybean_df = market_df[market_df['Product'] == 'Soybean'].reset_index(drop=True)
market_wheat_df = market_df[market_df['Product'] == 'Wheat'].reset_index(drop=True)

market_crop_dfs = [market_corn_df,market_rice_df,market_soybean_df,market_wheat_df]

In [6]:
def label_seasonal_factor(row):
    if row['Seasonal_Factor'] == 'Low':
        return 1
    if row['Seasonal_Factor'] == 'Medium':
        return 2
    if row['Seasonal_Factor'] == 'High':
        return 3

In [None]:
for mcd in market_crop_dfs:
    mcd['avg_market_price'] = mcd.apply(lambda row: (row.Market_Price_per_ton + row.Competitor_Price_per_ton)/2, axis=1)
    mcd['demand_diff'] = mcd.apply(lambda row: (row.Demand_Index - row.Supply_Index), axis=1)
    # mcd['label_seasonal_factor'] = mcd.apply(label_seasonal_factor, axis=1)


In [8]:
for mcd,crop in zip(market_crop_dfs,crop_names):
    print(F"Crop {crop}")
    print(mcd.drop(columns=['Market_ID','Market_Price_per_ton','Competitor_Price_per_ton']).describe())
    print("\n")

Crop Corn
       Demand_Index  Supply_Index  Economic_Indicator  Weather_Impact_Score  \
count   2450.000000   2450.000000         2450.000000           2450.000000   
mean     124.713699    125.097808            0.996569             49.462663   
std       43.614045     43.320147            0.289507             29.089215   
min       50.061781     50.162613            0.500292              0.092426   
25%       87.303863     86.877102            0.747366             24.030069   
50%      123.998307    124.782559            1.003077             48.608678   
75%      162.018556    162.076101            1.247166             74.550329   
max      199.997449    199.915923            1.499887             99.988335   

       Consumer_Trend_Index  avg_market_price  demand_diff  \
count           2450.000000       2450.000000  2450.000000   
mean              98.787840        299.795328    -0.384110   
std               28.848820         84.035632    60.805239   
min               50.039115   