In [None]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Load the dataset
file_path = ('/content/aluminum_data_set_final.csv')
df = pd.read_csv(file_path)

# Extract numerical values from the Chemical Composition column
def extract_composition(comp_str):
    comp_dict = {}
    components = comp_str.split(', ')
    for comp in components:
        element, percentage = comp.split(': ')
        comp_dict[element.strip()] = float(percentage.strip('%'))
    return comp_dict

# Apply the extraction function to the Chemical Composition column
composition_df = df['Chemical Composition'].apply(extract_composition).apply(pd.Series)

# Concatenate the extracted composition features with the original dataset
df_processed = pd.concat([df.drop(columns=['Chemical Composition']), composition_df], axis=1)

# Convert the target 'Quality' column to binary (1 for Good, 0 for Bad)
df_processed['Quality'] = df_processed['Quality'].apply(lambda x: 1 if x == 'Good' else 0)

# Define features (X) and target (y)
X = df_processed.drop(columns=['Quality'])
y = df_processed['Quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

# Save the model and the scaler
joblib.dump(model, 'aluminum_quality_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Example prediction for new data input
new_data = {
    "Casting Temperature (°C)": 750,
    "Cooling Water Temperature (°C)": 28,
    "Casting Speed (m/min)": 1.5,
    "Cast Bar Entry Temperature at Rolling Mill (°C)": 400,
    "Emulsion Temperature (°C)": 60,
    "Pressure (MPa)": 0.8,
    "Concentration (%)": 5.5,
    "Rod Quench Water Pressure (MPa)": 1.2,
    "Al": 96.0,
    "Cu": 1.0,
    "Si": 0.8
}

# Convert the single input data to a DataFrame
new_df = pd.DataFrame([new_data])

# Scale the new input data using the saved scaler
new_df_scaled = scaler.transform(new_df)

# Make a prediction using the trained model
prediction = model.predict(new_df_scaled)[0]
prediction_label = "Good" if prediction == 1 else "Not Good"

print(f"Predicted Aluminum Quality: {prediction_label}")

Accuracy: 0.72
Confusion Matrix:
[[72  1]
 [27  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.99      0.84        73
           1       0.00      0.00      0.00        27

    accuracy                           0.72       100
   macro avg       0.36      0.49      0.42       100
weighted avg       0.53      0.72      0.61       100

Predicted Aluminum Quality: Not Good


In [None]:
# Check for NaNs in the features and target
print(df_processed.isna().sum())

Quality                                            0
Casting Temperature (°C)                           0
Cooling Water Temperature (°C)                     0
Casting Speed (m/min)                              0
Cast Bar Entry Temperature at Rolling Mill (°C)    0
Emulsion Temperature (°C)                          0
Pressure (MPa)                                     0
Concentration (%)                                  0
Rod Quench Water Pressure (MPa)                    0
Al                                                 0
Cu                                                 0
Si                                                 0
dtype: int64


In [None]:
# Remove rows with NaN values from both X and y
df_processed = df_processed.dropna()
X = df_processed.drop(columns=['Quality'])
y = df_processed['Quality']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Check for NaNs in y_train and y_test
print(pd.Series(y_train).isna().sum())
print(pd.Series(y_test).isna().sum())

0
0


In [None]:
# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the logistic regression model
model = LogisticRegression(random_state=42, multi_class='ovr')
model.fit(X_train_scaled, y_train)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Load the dataset
file_path = '/content/aluminum_data_set_final.csv'
df = pd.read_csv(file_path)

# Extract numerical values from the Chemical Composition column
def extract_composition(comp_str):
    comp_dict = {}
    components = comp_str.split(', ')
    for comp in components:
        element, percentage = comp.split(': ')
        comp_dict[element.strip()] = float(percentage.strip('%'))
    return comp_dict

# Apply the extraction function to the Chemical Composition column
composition_df = df['Chemical Composition'].apply(extract_composition).apply(pd.Series)

# Concatenate the extracted composition features with the original dataset
df_processed = pd.concat([df.drop(columns=['Chemical Composition']), composition_df], axis=1)

# Map 'Quality' column to three classes (Good: 2, Moderate: 1, Not Good: 0)
df_processed['Quality'] = df_processed['Quality'].map({'Good': 2, 'Moderate': 1, 'Not Good': 0})

# Remove rows with NaN values
df_processed = df_processed.dropna()

# Define features (X) and target (y)
X = df_processed.drop(columns=['Quality'])
y = df_processed['Quality']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a logistic regression model
model = LogisticRegression(random_state=42, multi_class='ovr')
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Check unique labels in y_test and y_pred
print("Unique labels in y_test:", y_test.unique())
print("Unique labels in y_pred:", set(y_pred))

# Define target names based on unique labels
labels = sorted(set(y_test) | set(y_pred))
label_names = {0: 'Not Good', 1: 'Moderate', 2: 'Good'}
target_names = [label_names[label] for label in labels]

# Generate the classification report
class_report = classification_report(y_test, y_pred, target_names=target_names)

print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

# Save the model and the scaler
joblib.dump(model, 'aluminum_quality_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

# Now, let's use the trained model to make predictions based on manual input

# Load the saved model and scaler
loaded_model = joblib.load('aluminum_quality_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')

# Manually input values
al = float(input("Enter Aluminum Content (%): "))
cu = float(input("Enter Copper Content (%): "))
si = float(input("Enter Silicon Content (%): "))
casting_temp = float(input("Enter Casting Temperature (°C): "))
cooling_water_temp = float(input("Enter Cooling Water Temperature (°C): "))
casting_speed = float(input("Enter Casting Speed (m/min): "))
bar_entry_temp = float(input("Enter Cast Bar Entry Temperature at Rolling Mill (°C): "))
emulsion_temp = float(input("Enter Emulsion Temperature (°C): "))
pressure = float(input("Enter Pressure (MPa): "))
concentration = float(input("Enter Concentration (%): "))
rod_quench_pressure = float(input("Enter Rod Quench Water Pressure (MPa): "))

# Create a single input data dictionary with the same feature order as in training
input_data = {
    "Al": al,
    "Cu": cu,
    "Si": si,
    "Casting Temperature (°C)": casting_temp,
    "Cooling Water Temperature (°C)": cooling_water_temp,
    "Casting Speed (m/min)": casting_speed,
    "Cast Bar Entry Temperature at Rolling Mill (°C)": bar_entry_temp,
    "Emulsion Temperature (°C)": emulsion_temp,
    "Pressure (MPa)": pressure,
    "Concentration (%)": concentration,
    "Rod Quench Water Pressure (MPa)": rod_quench_pressure,
}

# Convert the input data to a DataFrame with the correct order of columns
input_df = pd.DataFrame([input_data], columns=X.columns)

# Scale the input data using the loaded scaler
input_scaled = loaded_scaler.transform(input_df)

# Make a prediction using the loaded model
prediction = loaded_model.predict(input_scaled)[0]
prediction_label = {2: "Good", 1: "Moderate", 0: "Not Good"}[prediction]

print(f"Predicted Aluminum Quality: {prediction_label}")


Unique labels in y_test: [2. 1.]
Unique labels in y_pred: {1.0, 2.0}
Accuracy: 0.47761194029850745
Confusion Matrix:
[[11 28]
 [ 7 21]]
Classification Report:
              precision    recall  f1-score   support

    Moderate       0.61      0.28      0.39        39
        Good       0.43      0.75      0.55        28

    accuracy                           0.48        67
   macro avg       0.52      0.52      0.47        67
weighted avg       0.53      0.48      0.45        67

Enter Aluminum Content (%): 54
Enter Copper Content (%): 115
Enter Silicon Content (%): 121
Enter Casting Temperature (°C): 788
Enter Cooling Water Temperature (°C): 145
Enter Casting Speed (m/min): 111
Enter Cast Bar Entry Temperature at Rolling Mill (°C): 4444
Enter Emulsion Temperature (°C): 555
Enter Pressure (MPa): 11
Enter Concentration (%): 11
Enter Rod Quench Water Pressure (MPa): 11
Predicted Aluminum Quality: Good


In [None]:
import joblib
from sklearn.ensemble import RandomForestClassifier  # Example model

# Train your model (replace with your actual training code)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, 'model.pkl')

['model.pkl']

In [None]:
!python train_model.py

python3: can't open file '/content/train_model.py': [Errno 2] No such file or directory


In [None]:
import joblib
from sklearn.ensemble import RandomForestClassifier  # Example model

# Train your model (replace with your actual training code)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, 'model.pkl')

# Save the scaler as well
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [None]:
import joblib
import pandas as pd

# Load the saved model and scaler
loaded_model = joblib.load('model.pkl')
loaded_scaler = joblib.load('scaler.pkl')

# Manually input values (this would typically come from user input or a form)
input_data = {
    "Al": al,
    "Cu": cu,
    "Si": si,
    "Casting Temperature (°C)": casting_temp,
    "Cooling Water Temperature (°C)": cooling_water_temp,
    "Casting Speed (m/min)": casting_speed,
    "Cast Bar Entry Temperature at Rolling Mill (°C)": bar_entry_temp,
    "Emulsion Temperature (°C)": emulsion_temp,
    "Pressure (MPa)": pressure,
    "Concentration (%)": concentration,
    "Rod Quench Water Pressure (MPa)": rod_quench_pressure,
}

# Convert the input data to a DataFrame with the correct order of columns
input_df = pd.DataFrame([input_data], columns=X.columns)

# Scale the input data using the loaded scaler
input_scaled = loaded_scaler.transform(input_df)

# Make a prediction using the loaded model
prediction = loaded_model.predict(input_scaled)[0]
prediction_label = {2: "Good", 1: "Moderate", 0: "Not Good"}[prediction]

print(f"Predicted Aluminum Quality: {prediction_label}")


Predicted Aluminum Quality: Not Good


