In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
# Load the dataset
df = pd.read_csv('TASK-ML-INTERN.csv')

In [None]:
df

In [4]:
# Separate spectral data and target variable
spectral_data = df.iloc[:, 1:-1]  #first column is ID, last is target
vomitoxin_ppb = df['vomitoxin_ppb']

In [5]:
# Normalize spectral data
scaler = StandardScaler()
normalized_data = scaler.fit_transform(spectral_data)

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_data, vomitoxin_ppb, test_size=0.2, random_state=42)

In [7]:
# Perform PCA for dimensionality reduction
pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [8]:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=200, max_depth=None, random_state=42)
rf_model.fit(X_train_pca, y_train)

RandomForestRegressor(n_estimators=200, random_state=42)

In [9]:
# Evaluate model
y_pred_rf = rf_model.predict(X_test_pca)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
r2_rf = r2_score(y_test, y_pred_rf)
print(f"Random Forest - MAE: {mae_rf}, RMSE: {rmse_rf}, R²: {r2_rf}")

Random Forest - MAE: 2106.8558999999996, RMSE: 5099.632220612973, R²: 0.9069654091910249
