# Creating and testing the model

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from azureml.core import Workspace, Dataset
import json

In [2]:
crop_df = pd.read_csv("./crop-data.csv")
crop_df.head()

Unnamed: 0,index,N,P,K,temperature,humidity,ph,rainfall,time
0,0,90,42,43,20.879744,82.002744,6.502985,202.935536,175
1,1,85,58,41,21.770462,80.319644,7.038096,226.655537,124
2,2,60,55,44,23.004459,82.320763,7.840207,263.964248,104
3,3,74,35,40,26.491096,80.158363,6.980401,242.864034,155
4,4,78,42,42,20.130175,81.604873,7.628473,262.71734,112


In [3]:
crop_reg = LinearRegression();

X = crop_df.drop(columns=["time"], axis=1)
y = crop_df["time"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

crop_reg.fit(X_train, y_train)

y_pred = crop_reg.predict(X_test)

print("r2 score: " + str(r2_score(y_test, y_pred)))

r2 score: 0.9352914731395665


# Predicting optimum harvest time

In [4]:
subscription_id = '<subscription_id>'
resource_group = '<resource_group_name>'
workspace_name = '<azure_ml_workspace_name>'

workspace = Workspace(subscription_id, resource_group, workspace_name)

In [5]:
dataset = Dataset.get_by_name(workspace, name='<name>')

json_df = dataset.to_pandas_dataframe()

data = {}

for ind in json_df.index:
    obj = json.loads(json_df["Line"][ind])
    for key, value in obj["Body"].items():
        if key == "nitrogen":
            data["N"] = [value]
        elif key == "potassium":
            data["K"] = [value]
        elif key == "phosphorus":
            data["P"] = [value]
        
        data[key] = [value]


cols = crop_df.columns.to_list()
data["index"] = 0
iot_df = pd.DataFrame(columns=cols[:-1], data=data)

iot_pred = crop_reg.predict(iot_df)

print("To be harvested in " + str(round(iot_pred[0])) + " days")


To be harvested in 187 days
