In [None]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, r2_score

import matplotlib.pyplot as plt

In [None]:
filepath = 'https://raw.githubusercontent.com/MrHexeberg/AI-medical-project/refs/heads/main/Tablet%20examination%20-%20Munka1.csv'

data = pd.read_csv(filepath, skiprows = 1)
data = data.drop('Measurement', axis = 1)

data.head()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data[['Press Force (kg)', 'Motor Speed (tablets/min)', 'Particle Size (μm)']].hist(figsize=(12,8))

In [None]:
data[['Tablet Hardness (N)',	'Tablet Height (mm)',	'Tablet Friability (%)']].hist(figsize=(12, 8), color='g')

In [None]:
X = data[['Press Force (kg)', 'Motor Speed (tablets/min)', 'Particle Size (μm)']]
y = data[['Tablet Hardness (N)', 'Tablet Height (mm)', 'Tablet Friability (%)']]

In [None]:
# Splitting the dataset into training and validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scaling the dataset
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

In [None]:
# Multi Output Regressor
model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train_scaled, y_train)

In [None]:
y_pred = model.predict(X_test_scaled)

rmse = root_mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('RMSE:', rmse)
print('R2:', r2)

In [None]:
# Plotting Tablet Hardness (N)
sample = 'Tablet Hardness (N)'
test_sample = y_test[sample]
plt.figure(figsize=(10,6))
plt.scatter(test_sample, y_pred[:, 0])

plt.xlabel(f'Actual {sample}')
plt.ylabel(f'Predicted {sample}')
plt.title(f'Actual vs Predicted {sample}')

# Making custom grid for the plot
xy = np.arange(52, 102, 2)
plt.xticks(xy)
plt.yticks(xy)
plt.grid()

In [None]:
# Tablet height
sample = 'Tablet Height (mm)'
test_sample = y_test[sample]
plt.figure(figsize=(10,6))
plt.scatter(test_sample, y_pred[:, 1])

plt.xlabel(f'Actual {sample}')
plt.ylabel(f'Predicted {sample}')
plt.title(f'Actual vs Predicted {sample}')
plt.legend()

# Making custom grid for the plot
xy = np.arange(4.4, 6.2, 0.1)
plt.xticks(xy)
plt.yticks(xy)
plt.grid()

In [None]:
# Plotting Tablet Friability (%)
sample = 'Tablet Friability (%)'
test_sample = y_test[sample]
plt.scatter(test_sample, y_pred[:, 2])

plt.xlabel(f'Actual {sample}')
plt.ylabel(f'Predicted {sample}')
plt.title(f'Actual vs Predicted {sample}')

# Making custom grid for the plot
xy = np.arange(0.1, 0.5, 0.05)
plt.xticks(xy)
plt.yticks(xy)
plt.grid()