In [1]:
!pip install mlflow

Defaulting to user installation because normal site-packages is not writeable
Collecting mlflow
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.16.1-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting sqlalchemy<3,>=1.4.0 (from mlflow)
  Downloading sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl.metadata (9.8 kB)
Collecting waitress<4 (from mlflow)
  Downloading waitress-3.0.2-py3-none-any.whl.metadata (5.8 kB)
Collecting cloudpickle<4 (from mlflow-skinny==2.22.0->mlflow)
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2

DEPRECATION: Loading egg at c:\program files\python311\lib\site-packages\vboxapi-1.0-py3.11.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 1.23.5 which is incompatible.

[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
# 📌 Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
import mlflow
import mlflow.sklearn
import joblib
import time

# 📌 Step 2: Set MLflow Tracking URI
mlflow.set_tracking_uri(r"file:///E://UNIVERSITY//Semester 6//Big Data Analytics//LAB//ML_Assignment//ML//mlruns")

# 📌 Step 3: Load the crop dataset
df = pd.read_csv("Crop_recommendation.csv")

# Remove unnamed columns if they exist
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# 📌 Step 4: Encode label (crop name)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# 📌 Step 5: Feature & target split
X = df.drop('label', axis=1)
y = df['label']

# 📌 Step 6: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 📌 Step 7: Start MLflow experiment
mlflow.set_experiment("Crop_Recommendation_Experiment")

with mlflow.start_run():
    start_time = time.time()

    # Model training
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Prediction
    y_pred = model.predict(X_test)

    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    duration = time.time() - start_time

    # ✅ Log to MLflow
    mlflow.log_param("model_type", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("training_time", duration)

    # ✅ Save model and label encoder
    mlflow.sklearn.log_model(model, "model")
    joblib.dump(model, "model.pkl")
    joblib.dump(le, "label_encoder.pkl")

    print("✅ Model trained and saved successfully.")




✅ Model trained and saved successfully.


In [6]:

import pandas as pd
import numpy as np
df = pd.read_csv("Crop_recommendation.csv")  # <-- Make sure crop.csv is in the same folder
df.describe()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,Unnamed: 8,Unnamed: 9
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,0.0,0.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,,
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389,,
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267,,
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686,,
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624,,
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508,,
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117,,


In [7]:
df.head()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,label,Unnamed: 8,Unnamed: 9
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice,,
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice,,
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice,,
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice,,
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice,,
