In [1]:
# Load the files crops_reduced.csv and weather_clean.csv, which contain the relevant portion of the data
# from the weather and crop recommendations datasets. 

import pandas as pd

crops = pd.read_csv("crops_reduced.csv")
weather = pd.read_csv("weather_clean.csv")
display(crops.head())
display(weather.head())

Unnamed: 0,temperature,humidity,rainfall,label
0,20.879744,82.002744,202.935536,rice
1,21.770462,80.319644,226.655537,rice
2,23.004459,82.320763,263.964248,rice
3,26.491096,80.158363,242.864034,rice
4,20.130175,81.604873,262.71734,rice


Unnamed: 0,ds,station,TMP,HUM,LLU
0,2004-01-01 00:00:00,Belisario,9.93,98.06,0.0
1,2004-01-01 01:00:00,Belisario,9.14,98.47,0.1
2,2004-01-01 02:00:00,Belisario,8.71,98.65,0.0
3,2004-01-01 03:00:00,Belisario,8.63,99.03,0.0
4,2004-01-01 04:00:00,Belisario,10.18,86.85,0.0


In [2]:
# We take the average temperature and precipitation for each 
temp_district = weather.groupby("station").TMP.mean()
hum_district = weather.groupby("station").HUM.mean()
llu_district = weather.groupby("station").LLU.mean()

#tranform precipitation per hour (mm/h) to precipitation per month (mm/month)
llu_district = llu_district * 720

#create the weather_sum dataframe, which contains the average temperature, humidity and precipitation
weather_sum = pd.merge(temp_district, hum_district, on="station")
weather_sum = pd.merge(weather_sum, llu_district, on="station")
display(weather_sum.head())
print("\n shape:")
display(weather_sum.shape)
#The average precipitation values are given in mm/h, we transform it to mm/year


#We average the parameters for each crop
crops_sum = crops.groupby("label").mean()
display(crops_sum.head())
print("\n shape:")
display(crops_sum.shape)
weather_sum = weather_sum.reset_index()
crops_sum = crops_sum.reset_index()


Unnamed: 0_level_0,TMP,HUM,LLU
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Belisario,13.987365,70.030197,97.996124
Carapungo,14.814184,73.019845,60.04762
Centro,14.597383,67.966553,103.405138
Cotocollao,13.986501,72.790882,67.762174
ElCamal,13.991498,70.345796,105.427547



 shape:


(8, 3)

Unnamed: 0_level_0,temperature,humidity,rainfall
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
apple,22.630942,92.333383,112.654779
banana,27.376798,80.358123,104.62698
blackgram,29.97334,65.118426,67.884151
chickpea,18.872847,16.860439,80.058977
coconut,27.409892,94.844272,175.686646



 shape:


(22, 3)

In [4]:
# ====================================================
# Match each district to the best recommendation based on each weather condition
# ====================================================

display(weather_sum.describe())
display(crops_sum.describe())

#cross product of the two data sets
cross = pd.merge(weather_sum, crops_sum, how='cross')

#for each crop and district, we find the difference between the ideal characteristic for the crop 
# and the characteristic of the district
cross["diff_temperature"] = abs(cross.temperature - cross.TMP)
cross["diff_humidity"] = abs(cross.humidity - cross.HUM)
cross["diff_rainfall"] = abs(cross.rainfall - cross.LLU)

display(cross.loc[1:10])

Unnamed: 0,TMP,HUM,LLU
count,8.0,8.0,8.0
mean,14.527063,71.577803,92.462523
std,1.272623,2.071205,24.185587
min,12.40142,67.966553,60.04762
25%,13.987149,70.266896,68.410505
50%,14.294441,71.942072,100.700631
75%,15.100429,72.848123,105.99988
max,16.478991,74.585004,128.718084


Unnamed: 0,temperature,humidity,rainfall
count,22.0,22.0,22.0
mean,25.616244,71.481779,103.463655
std,3.650425,22.411186,51.964501
min,18.872847,16.860439,24.689952
25%,22.996627,60.353581,68.316071
50%,25.249426,80.100798,99.665748
75%,28.081631,88.969121,135.134574
max,33.723859,94.844272,236.181114


Unnamed: 0,station,TMP,HUM,LLU,label,temperature,humidity,rainfall,diff_temperature,diff_humidity,diff_rainfall
1,Belisario,13.987365,70.030197,97.996124,banana,27.376798,80.358123,104.62698,13.389433,10.327926,6.630856
2,Belisario,13.987365,70.030197,97.996124,blackgram,29.97334,65.118426,67.884151,15.985975,4.911771,30.111973
3,Belisario,13.987365,70.030197,97.996124,chickpea,18.872847,16.860439,80.058977,4.885482,53.169757,17.937147
4,Belisario,13.987365,70.030197,97.996124,coconut,27.409892,94.844272,175.686646,13.422527,24.814075,77.690522
5,Belisario,13.987365,70.030197,97.996124,coffee,25.540477,58.869846,158.066295,11.553112,11.16035,60.070171
6,Belisario,13.987365,70.030197,97.996124,cotton,23.988958,79.843474,80.398043,10.001593,9.813278,17.598081
7,Belisario,13.987365,70.030197,97.996124,grapes,23.849575,81.875228,69.611829,9.86221,11.845031,28.384295
8,Belisario,13.987365,70.030197,97.996124,jute,24.958376,79.639864,174.792798,10.971011,9.609668,76.796673
9,Belisario,13.987365,70.030197,97.996124,kidneybeans,20.115085,21.605357,105.919778,6.12772,48.42484,7.923653
10,Belisario,13.987365,70.030197,97.996124,lentil,24.509052,64.804785,45.680454,10.521687,5.225412,52.31567


In [5]:
#recommend a crop based on the temperature in the district

rec_temp = cross.loc[cross.groupby("station").diff_temperature.idxmin()]
labels = {'label': 'recommendation', 'temperature':'temp_crop', 'humidity':'hum_crop', 'rainfall':'rain_crop'}
rec_temp = rec_temp.rename(columns=labels).drop(columns=['diff_temperature', 'diff_humidity', 'diff_rainfall'])
print("\n Recommendation based on average temperature: ")
display(rec_temp)


 Recommendation based on average temperature: 


Unnamed: 0,station,TMP,HUM,LLU,recommendation,temp_crop,hum_crop,rain_crop
3,Belisario,13.987365,70.030197,97.996124,chickpea,18.872847,16.860439,80.058977
25,Carapungo,14.814184,73.019845,60.04762,chickpea,18.872847,16.860439,80.058977
47,Centro,14.597383,67.966553,103.405138,chickpea,18.872847,16.860439,80.058977
69,Cotocollao,13.986501,72.790882,67.762174,chickpea,18.872847,16.860439,80.058977
91,ElCamal,13.991498,70.345796,105.427547,chickpea,18.872847,16.860439,80.058977
113,Guamaní,12.40142,74.585004,128.718084,chickpea,18.872847,16.860439,80.058977
135,LosChillos,15.959165,71.613835,107.716879,chickpea,18.872847,16.860439,80.058977
157,Tumbaco,16.478991,72.270308,68.626615,chickpea,18.872847,16.860439,80.058977


In [6]:
#recommend a crop based on the humidity in the district
rec_hum = cross.loc[cross.groupby("station").diff_humidity.idxmin()]
labels = {'label': 'recommendation', 'temperature':'temp_crop', 'humidity':'hum_crop', 'rainfall':'rain_crop'}
rec_hum = rec_hum.rename(columns=labels).drop(columns=['diff_temperature', 'diff_humidity', 'diff_rainfall'])
print("\n Recommendation based on average humidity: ")
display(rec_hum)


 Recommendation based on average humidity: 


Unnamed: 0,station,TMP,HUM,LLU,recommendation,temp_crop,hum_crop,rain_crop
2,Belisario,13.987365,70.030197,97.996124,blackgram,29.97334,65.118426,67.884151
30,Carapungo,14.814184,73.019845,60.04762,jute,24.958376,79.639864,174.792798
46,Centro,14.597383,67.966553,103.405138,blackgram,29.97334,65.118426,67.884151
74,Cotocollao,13.986501,72.790882,67.762174,jute,24.958376,79.639864,174.792798
90,ElCamal,13.991498,70.345796,105.427547,blackgram,29.97334,65.118426,67.884151
118,Guamaní,12.40142,74.585004,128.718084,jute,24.958376,79.639864,174.792798
134,LosChillos,15.959165,71.613835,107.716879,blackgram,29.97334,65.118426,67.884151
156,Tumbaco,16.478991,72.270308,68.626615,blackgram,29.97334,65.118426,67.884151


In [7]:
#recommend a crop based on the rainfaill in the district
rec_rain = cross.loc[cross.groupby("station").diff_rainfall.idxmin()]
labels = {'label': 'recommendation', 'temperature':'temp_crop', 'humidity':'hum_crop', 'rainfall':'rain_crop'}
rec_rain= rec_rain.rename(columns=labels).drop(columns=['diff_temperature', 'diff_humidity', 'diff_rainfall'])
print("\n Recommendation based on average rainfall: ")
display(rec_rain)


 Recommendation based on average rainfall: 


Unnamed: 0,station,TMP,HUM,LLU,recommendation,temp_crop,hum_crop,rain_crop
12,Belisario,13.987365,70.030197,97.996124,mango,31.20877,50.156573,94.704515
24,Carapungo,14.814184,73.019845,60.04762,blackgram,29.97334,65.118426,67.884151
45,Centro,14.597383,67.966553,103.405138,banana,27.376798,80.358123,104.62698
68,Cotocollao,13.986501,72.790882,67.762174,blackgram,29.97334,65.118426,67.884151
97,ElCamal,13.991498,70.345796,105.427547,kidneybeans,20.115085,21.605357,105.919778
127,Guamaní,12.40142,74.585004,128.718084,papaya,33.723859,92.403388,142.627839
151,LosChillos,15.959165,71.613835,107.716879,pomegranate,21.837842,90.125504,107.528442
156,Tumbaco,16.478991,72.270308,68.626615,blackgram,29.97334,65.118426,67.884151
