<a href="https://colab.research.google.com/github/DwijBishnoi/Data-Driven-Drilling-Optimization/blob/main/Multi_disciplinary_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = '/content/sample_data/Dataset.xlsx'
df = pd.read_excel(file_path)

# Display the initial DataFrame preview
print("Initial DataFrame Preview:")
print(df.head(10))

# Check the columns in the DataFrame
print("Columns in the DataFrame:")
print(df.columns)

# Clean the DataFrame
# Remove the first row and set the second row as header
df.columns = df.iloc[1]  # Set the second row as header
df = df[2:]  # Remove the first two rows

# Reset index
df.reset_index(drop=True, inplace=True)

# Remove the first column if it contains only NaN values
df = df.loc[:, df.columns.notna()]

# Display cleaned columns
print("Cleaned DataFrame Columns:")
print(df.columns)

# Identify expected columns
expected_columns = ['Spindle Speed', 'Feed Rate', 'Drill Diameter',
                    'Delamination Factor', 'Cylindricity', 'Circularity', 'MRR']

# Rename columns only if they match
if len(df.columns) == len(expected_columns):
    df.columns = expected_columns
else:
    print(f"Warning: Expected {len(expected_columns)} columns, but got {len(df.columns)}. Current columns: {df.columns.tolist()}")

# Convert relevant columns to numeric, handling errors
df = df.apply(pd.to_numeric, errors='coerce')

# Handle NaN values if necessary (e.g., dropping)
df.dropna(inplace=True)

# Check the shape of the DataFrame after cleaning
print("Shape of DataFrame after cleaning:", df.shape)

# Ensure the DataFrame is not empty
if df.empty:
    raise ValueError("The DataFrame is empty after cleaning. Please check the data.")

# Split the data into features and target variable
X = df[['Spindle Speed', 'Feed Rate', 'Drill Diameter', 'Delamination Factor', 'Cylindricity', 'Circularity']]
y = df['MRR']

# Check if X and y are not empty
if X.empty or y.empty:
    raise ValueError("X or y is empty. Please check the data after splitting.")

models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42),
    "KNN": KNeighborsRegressor(),
    "XGBoost": XGBRegressor(random_state=42)
}

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)

# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculate mean squared error and R^2 score
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

# Print the results
print(f"Training Mean Squared Error: {train_mse}")
print(f"Testing Mean Squared Error: {test_mse}")
print(f"Training R^2 Score: {r2_train}")
print(f"Testing R^2 Score: {r2_test}")

results = []
for name, model in models.items():
    y_pred = model.predict(X_test)  # Use your existing X_test
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results.append([name, mse, r2])

results_df = pd.DataFrame(results, columns=["Model", "MSE", "R^2"])
print(results_df)

Initial DataFrame Preview:
   Unnamed: 0 Input Parameters Unnamed: 2      Unnamed: 3  \
0         NaN              NaN        NaN             NaN   
1         NaN    Spindle Speed  Feed Rate  Drill Diameter   
2         NaN             4500        0.1               4   
3         NaN             4500        0.1               6   
4         NaN             4500        0.1               8   
5         NaN             4500        0.2               4   
6         NaN             4500        0.2               6   
7         NaN             4500        0.2               8   
8         NaN             4500        0.3               4   
9         NaN             4500        0.3               6   

     Output parameters    Unnamed: 5   Unnamed: 6    Unnamed: 7  
0         Minimization           NaN          NaN  Maximization  
1  Delamination Factor  Cylindricity  Circularity           MRR  
2                1.043         0.032        0.036       0.05325  
3             1.041772         0.037 