In [None]:
# Uploading dataset into colab notebook
from google.colab import files
u = files.upload()

In [2]:
# Loading dataset into DataFrame using pandas
import pandas as pd
df = pd.read_csv('synthetic_hyperspectral_100bands.csv')

In [None]:
# Basic data visualization
df.head(5)

In [None]:
df.info()

In [None]:
# Checking for null values
df.isnull().sum()

In [None]:
# Using matplotlib and seaborn to visualize whole dataframe
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(15, 5))
sns.boxplot(data=df)
plt.xticks(rotation=90)
plt.show()

In [7]:
# Defining Feature and Target variables X & y
X = df.drop('DON_Concentration', axis = 1)
y = df['DON_Concentration']

In [8]:
# Normalizing the numerical data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Visualizing average reflectance using Lineplot
avg_reflectance = X.mean(axis=0)
plt.figure(figsize=(10, 5))
plt.plot(avg_reflectance)
plt.title('Average Reflectance Across Wavelength Bands')
plt.xlabel('Wavelength Bands')
plt.ylabel('Average Reflectance')
plt.show()

In [None]:
# Spectral Reflectance visualization using Heatmap
plt.figure(figsize=(10, 5))
sns.heatmap(X, cmap='summer')
plt.title('Heatmap of Spectral Reflectance')
plt.xlabel('Wavelength Bands')
plt.ylabel('Corn Samples')
plt.show()

In [11]:
# Reducing dimensions in datframe using PCA(Principle Component Analysis)
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# Visualizing explained variance of top 10 components
explained_variance = pca.explained_variance_ratio_
print(f'Explained variance by top 10 principal components: {explained_variance}')

In [None]:
# Visualizing 2D Scatter plot of PCA
plt.figure(figsize=(10, 5))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='cool')
plt.title('PCA - 2D Scatter Plot')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='DON Concentration')
plt.show()

In [14]:
# Reducing the data dimensions using t-SNE(t-distributed Stochastic Neighbor Embedding)
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2)
X_tsne = tsne.fit_transform(X_scaled)

In [None]:
# Visualizing t-SNE 2D scatter plot
plt.figure(figsize=(10, 5))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=y, cmap='summer')
plt.title('t-SNE - 2D Scatter Plot')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.colorbar(label='DON Concentration')
plt.show()

In [16]:
# Splitting the dataset into training(80%) and testing(20%) after dimensionality reduction
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Training basic MLP Regressor neural network
MLP_Model = MLPRegressor(hidden_layer_sizes=(50, 25), max_iter=1000, random_state=30)
MLP_Model.fit(X_train, y_train)

In [18]:
# Evaluating model performance using MSE, MAE and R2 Score metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
y_pred = MLP_Model.predict(X_test)

In [None]:
# Printing metric values to check model performance
import math
mae = mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')

In [None]:
# Visualizing Actual V/S Predicted Values
plt.figure(figsize=(10, 5))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.title('Actual vs. Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.show()

In [21]:
# Hyper-parameter tuning using GridSearchCV(cross-validation)
from sklearn.model_selection import GridSearchCV
param_grid = {'hidden_layer_sizes': [(50, 25), (100, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd']
}

In [None]:
# GridSearchCV Model Training
grid_search = GridSearchCV(MLP_Model, param_grid, cv=3, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)

In [None]:
# Printing for best parameters
best_params = grid_search.best_params_
print(f'Best parameters: {best_params}')

In [None]:
# Printing metrics for GridSearchCV
mae = mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')

In [None]:
# Hyper-Parameter tuning using XGBoost boosting algorithm
from xgboost import XGBRegressor
# XGBoost model training
XGB_Model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
XGB_Model.fit(X_train, y_train)

In [None]:
# Printing metrics for XGBoost algorithm
mae = mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
print(f'R² Score: {r2}')