In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Your data
data = {
    "Sample Id": ["060724A", "060724B", "061024A", "061024B", "061124A", "061124B", "061224A", "061224B", "061224C", "061324A", "061324B", "061724A", "061724B", "061724C", "061724D", "061824A", "061824B", "061824C", "061824D", "061924A", "061924B", "061924C"],
    "S04 (ml)": [20] * 22,
    "H2O (ml)": [120] * 22,
    "Cathode Weight (g)": [4.701, 5.864, 1.224, 3.116, 5.513, 4.761, 1.784, 2.984, 3.956, 2.254, 2.1, 1.053, 3.388, 4.22, 5.17, 3.476, 2.482, 4.667, 5.89, 1.434, 2.413, 3.527],
    "Volts": [5] * 22,
    "Amps 0": [2.17, 1.67, 0.63, 1.4, 1, 1.55, 0.81, 0.7, 0.33, 2.2, 0.92, 1.29, 0.58, 0.78, 1.63, 0.8, 0.78, 1.2, 1.5, 1.03, 1.94, 0.48],
    "Amps 15": [2.15, 1.73, 0.57, 0.8, 1.1, 1.71, 0.83, 0.87, 0.51, 1.43, 1.14, 0.72, 0.35, 0.8, 1.59, 0.81, 0.86, 1.37, 1.73, 0.6, 1.16, 0.15],
    "Amps 30": [2.3, 2.1, 0.58, 0.55, 1.18, 0.81, 0.57, 0.85, 0.65, 1, 1.23, 0.59, 0.42, 0.88, 1.5, 0.91, 0.9, 1.4, 1.71, 0.47, 1.09, 0.14],
    "Amps 45": [0, 2.76, 0.38, 0.94, 1.21, 1.27, 0.86, 0.87, 0.82, 0.74, 0.72, 0.66, 0.47, 0.8, 1.01, 1.21, 0.86, 1.31, 0.82, 0.33, 1.13, 0.14],
    "Amps 60": [0, 2.1, 0.55, 1.37, 0.88, 1.58, 0.93, 1.1, 1.06, 1.05, 0.69, 0.44, 0.73, 0, 1.04, 1.55, 0.59, 1.21, 0.61, 0.64, 1.05, 1.21],
    "Copper Sulfate Yield": [5.019, 7.161, 1.984, 3.941, 5.402, 3.911, 3.25, 4.649, 2.986, 4.559, 3.181, 1.613, 2.569, 1.045, 3.882, 5.327, 4.1, 3.433, 4.326, None, None, None]
}
# Convert to DataFrame
df = pd.DataFrame(data)

# Remove rows with None values in Copper Sulfate Yield
df = df.dropna(subset=['Copper Sulfate Yield'])

# Feature engineering: Create average Amps feature
df['Avg_Amps'] = df[['Amps 0', 'Amps 15', 'Amps 30', 'Amps 45', 'Amps 60']].mean(axis=1)

# Select features and target
X = df[['Cathode Weight (g)', 'Avg_Amps']]
y = df['Copper Sulfate Yield']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# KNN Regressor
knn = KNeighborsRegressor(n_neighbors=3, weights='distance')
knn.fit(X_train_scaled, y_train)

# Predict on test set
y_pred = knn.predict(X_test_scaled)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared Score: {r2:.4f}")

# Generate a grid of points for visualization
x_min, x_max = X['Cathode Weight (g)'].min() - 1, X['Cathode Weight (g)'].max() + 1
y_min, y_max = X['Avg_Amps'].min() - 0.5, X['Avg_Amps'].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# Predict using the model
Z = knn.predict(scaler.transform(np.c_[xx.ravel(), yy.ravel()]))
Z = Z.reshape(xx.shape)

# Plot the decision boundary
plt.figure(figsize=(12, 8))
plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdYlBu)
scatter = plt.scatter(X['Cathode Weight (g)'], X['Avg_Amps'], c=y, cmap=plt.cm.RdYlBu, edgecolor='black')
plt.colorbar(scatter)
plt.xlabel('Samples')
plt.ylabel('Predicted Yield')
plt.title('KNN Regression: Predicted Copper Sulfate Yield')
plt.tight_layout()
plt.show()


# Plot predicted yield for each sample
predicted_yield = knn.predict(scaler.transform(X))
plt.figure(figsize=(12, 6))
plt.bar(df['Sample Id'], predicted_yield)
plt.xlabel('Sample Name')
plt.ylabel('Predicted Yield')
plt.title('Predicted Copper Sulfate Yield by Sample')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
