In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Sample dataset as a dictionary (you can replace this with reading from a CSV)
data = {
    "Sample Id": ["060724A", "060724B", "061024A", "061024B", "061124A", "061124B", "061224A", "061224B", "061224C", "061324A", "061324B", "061724A", "061724B", "061724C", "061724D", "061824A", "061824B", "061824C", "061824D", "061924A", "061924B", "061924C"],
    "S04 (ml)": [20] * 22,
    "H2O (ml)": [120] * 22,
    "Cathode Weight (g)": [4.701, 5.864, 1.224, 3.116, 5.513, 4.761, 1.784, 2.984, 3.956, 2.254, 2.1, 1.053, 3.388, 4.22, 5.17, 3.476, 2.482, 4.667, 5.89, 1.434, 2.413, 3.527],
    "Volts": [5] * 22,
    "Amps 0": [2.17, 1.67, 0.63, 1.4, 1, 1.55, 0.81, 0.7, 0.33, 2.2, 0.92, 1.29, 0.58, 0.78, 1.63, 0.8, 0.78, 1.2, 1.5, 1.03, 1.94, 0.48],
    "Amps 15": [2.15, 1.73, 0.57, 0.8, 1.1, 1.71, 0.83, 0.87, 0.51, 1.43, 1.14, 0.72, 0.35, 0.8, 1.59, 0.81, 0.86, 1.37, 1.73, 0.6, 1.16, 0.15],
    "Amps 30": [2.3, 2.1, 0.58, 0.55, 1.18, 0.81, 0.57, 0.85, 0.65, 1, 1.23, 0.59, 0.42, 0.88, 1.5, 0.91, 0.9, 1.4, 1.71, 0.47, 1.09, 0.14],
    "Amps 45": [0, 2.76, 0.38, 0.94, 1.21, 1.27, 0.86, 0.87, 0.82, 0.74, 0.72, 0.66, 0.47, 0.8, 1.01, 1.21, 0.86, 1.31, 0.82, 0.33, 1.13, 0.14],
    "Amps 60": [0, 2.1, 0.55, 1.37, 0.88, 1.58, 0.93, 1.1, 1.06, 1.05, 0.69, 0.44, 0.73, 0, 1.04, 1.55, 0.59, 1.21, 0.61, 0.64, 1.05, 1.21],
    "Copper Sulfate Yield": [5.019, 7.161, 1.984, 3.941, 5.402, 3.911, 3.25, 4.649, 2.986, 4.559, 3.181, 1.613, 2.569, 1.045, 3.882, 5.327, 4.1, 3.433, 4.326, None, None, None]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Remove rows with missing target values
df = df.dropna(subset=["Copper Sulfate Yield"])

# Features and target variable
X = df[["Cathode Weight (g)", "Volts", "Amps 0", "Amps 15", "Amps 30", "Amps 45", "Amps 60"]]
y = df["Copper Sulfate Yield"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the SVM regressor model
model = SVR(kernel='rbf', C=100, gamma='auto')

# Train the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Visualize relationships (pairwise scatter plots)
plt.figure(figsize=(12, 10))
for i in range(X.shape[1]):
    plt.subplot(3, 3, i+1)
    plt.scatter(X.iloc[:, i], y, marker='o', s=50, alpha=0.8)
    plt.xlabel(X.columns[i])
    plt.ylabel("Copper Sulfate Yield")
plt.tight_layout()
plt.show()

# Function to predict yield
def predict_yield(model, cathode_weight, volts, amps_0, amps_15, amps_30, amps_45, amps_60):
    input_data = [[cathode_weight, volts, amps_0, amps_15, amps_30, amps_45, amps_60]]
    predicted_yield = model.predict(input_data)
    return predicted_yield[0]

# Example prediction using the model
example_yield = predict_yield(model, 5.864, 5, 1.67, 1.73, 2.1, 2.76, 2.1)
print("Predicted Yield for example:", example_yield)