In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.cross_decomposition import PLSRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [2]:
# Load dataset
df = pd.read_csv("nirscan_nano.csv")

# Remove rows where TS is NaN
df = df.dropna(subset=["TS"])

# Outlier removal using IQR on TS
Q1 = df["TS"].quantile(0.25)
Q3 = df["TS"].quantile(0.75)
IQR = Q3 - Q1
df = df[(df["TS"] >= Q1 - 1.5 * IQR) & (df["TS"] <= Q3 + 1.5 * IQR)]

In [3]:
# Features and target
X = df.drop(columns=["TS"])
y = df["TS"]

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



In [4]:
# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)

# Find best number of PLS components
best_r2 = -1
best_n = 1


In [5]:
# Train final model
pls = PLSRegression(n_components=9)
pls.fit(X_train, y_train)

# Predict
y_pred = pls.predict(X_test)

In [6]:
# Evaluation
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # RMSE manually
mae = mean_absolute_error(y_test, y_pred)

print("R²:", r2)
print("RMSE:", rmse)
print("MAE:", mae)


R²: 0.9742929049547578
RMSE: 2.4524836272868025
MAE: 1.9925280710827824


In [7]:
def predict_from_csv(file_path, output_file):
    # Load new data
    new_data = pd.read_csv(file_path)

    # Drop target column if it exists (we only need features)
    if 'TS' in new_data.columns:
        new_data = new_data.drop(columns=['TS'])

    # Reorder columns to match training features
    new_data = new_data[X.columns]

    # Scale the new data
    new_data_scaled = scaler.transform(new_data)

    # Predict
    predictions = pls.predict(new_data_scaled)

    # Save predictions as a single column
    pd.DataFrame(predictions, columns=["Predicted_TS"]).to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")


In [8]:
# Example usage (make sure to give correct path to your file)
predict_from_csv("input.csv", "predicted_output.csv")

Predictions saved to predicted_output.csv
