In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import gradio as gr
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score,confusion_matrix,classification_report,accuracy_score

In [None]:
data=pd.read_csv("/content/mission7.csv")
data.head()

In [None]:
data["Thrives"]=data["Thrives"].map({"Yes":1,"No":0})

In [None]:
data.columns

In [None]:
le={}
col=['Soil Type','Plant Species']
for i in col:
  le[i]=LabelEncoder()
  data[i]=le[i].fit_transform(data[i])

In [None]:
min=MinMaxScaler()
data[['Sunlight (hours/day)', 'Water Supply (liters/week)','Temperature (Â°C)', 'pH Level']]=min.fit_transform(data[['Sunlight (hours/day)', 'Water Supply (liters/week)','Temperature (Â°C)', 'pH Level']])

In [None]:
data.head()

In [None]:
x=data.drop("Thrives",axis=1)
y=data["Thrives"]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
lr=LogisticRegression()
lr.fit(x_train,y_train)
pred_lr=lr.predict(x_test)
print(mean_squared_error(y_test,pred_lr))
print(mean_absolute_error(y_test,pred_lr))
print(r2_score(y_test,pred_lr))
print(confusion_matrix(y_test,pred_lr))
print(classification_report(y_test,pred_lr))
print(accuracy_score(y_test,pred_lr))

In [None]:
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)
pred_dt=dt.predict(x_test)
print(mean_squared_error(y_test,pred_dt))
print(mean_absolute_error(y_test,pred_dt))
print(r2_score(y_test,pred_dt))
print(confusion_matrix(y_test,pred_dt))
print(classification_report(y_test,pred_dt))
print(accuracy_score(y_test,pred_dt))

In [None]:
rf=RandomForestClassifier()
rf.fit(x_train,y_train)
pred_rf=rf.predict(x_test)
print(mean_squared_error(y_test,pred_rf))
print(mean_absolute_error(y_test,pred_rf))
print(r2_score(y_test,pred_rf))
print(confusion_matrix(y_test,pred_rf))
print(classification_report(y_test,pred_rf))
print(accuracy_score(y_test,pred_rf))

In [None]:
plt.figure(figsize=(10,6))
sns.scatterplot(x=y_test,y=pred_lr,color='black')
sns.scatterplot(x=y_test,y=pred_dt,color='green')
sns.scatterplot(x=y_test,y=pred_rf,color='blue')
plt.title("hi")
plt.xlabel("y_test")
plt.ylabel("pred_model")
plt.show()


In [None]:
params={'max_iter':[100,200,300]}
grid=GridSearchCV(LogisticRegression(),params,cv=5)
grid.fit(x_train,y_train)
print(grid.best_params_)
grid.best_score_


In [None]:
par={'max_depth':[100,200,300]}
random=RandomizedSearchCV(RandomForestClassifier(),par,cv=5)
random.fit(x_train,y_train)
print(random.best_params_)
random.best_score_

In [None]:
data.columns

In [None]:
def haneen(ST,Sun,WS,T,pH,PS):
  try:
    input_data=pd.DataFrame({
        "Soil Type":[ST],
        "Sunlight (hours/day)":[Sun],
        "Water Supply (liters/week)":[WS],
        "Temperature (Â°C)":[T],
        "pH Level":[pH],
        "Plant Species":[PS]
    })
    for i in col:
      input_data[i]=le[i].transform(input_data[i])
    input_data[['Sunlight (hours/day)', 'Water Supply (liters/week)','Temperature (Â°C)', 'pH Level']]=min.transform(input_data[['Sunlight (hours/day)', 'Water Supply (liters/week)','Temperature (Â°C)', 'pH Level']])
    result=dt.predict(input_data)
    if result[0]==1:
      return "Yes"
    else:
      return "No"
  except Exception as e:
    return e
gr.Interface(
    fn=haneen,
    inputs= [
        gr.Dropdown(["Clay","Sandy","Loamy"],label="Soil Type"),
        gr.Number(label="Sunlight (hours/day)"),
        gr.Number(label="Water Supply (liters/week)"),
        gr.Number(label="Temperature (Â°C)"),
        gr.Number(label="pH Level"),
        gr.Dropdown(["Lily","Fern","Cactus"],label="Plant Species")
    ],
     outputs=gr.Textbox(label="prediction")
).launch()