In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')  # Ignoring some deprecation warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv("defects_data.csv")

# Display basic information about the dataset
print(df.info())

# Check for missing values
missing_values = df.isnull().sum()
if missing_values.any():
    print("Missing values detected in the dataset:")
    print(missing_values)
    
    # Fill missing values with the mean of each column
    df.fillna(df.mean(), inplace=True)
    print("Missing values have been filled with column means.")
else:
    print("No missing values detected in the dataset.")

# Define features (X) and target (y) assuming 'target' is the column name for labels
X = df.drop(columns=['target'])  # Replace 'target' with the actual target column name
y = df['target']  # Replace 'target' with the actual target column name

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature set
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
