### 1. Data Acquisition and Cleaning

In [None]:
# Option 1: Load your own dataset
import pandas as pd

df = pd.read_csv("path_to_your_dataset.csv")  # Change the path here
# Option 2: Load dataset from seaborn or sklearn
# import seaborn as sns
# df = sns.load_dataset('titanic')
# from sklearn.datasets import load_boston
# data = load_boston()
# df = pd.DataFrame(data.data, columns=data.feature_names)
# df['target'] = data.target

print("Columns in the dataset:")
print(df.columns.tolist())
df.head()

In [None]:
# Handling missing values, encoding and scaling
df = df.dropna()  # basic missing value handling
from sklearn.preprocessing import LabelEncoder, StandardScaler

label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

scaler = StandardScaler()
df[df.select_dtypes(include=['int64', 'float64']).columns] = scaler.fit_transform(df.select_dtypes(include=['int64', 'float64']))

df.head()

### 2. Exploratory Data Analysis (EDA)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

df.describe()


In [None]:
# Basic visualizations (without hue)
for col in df.select_dtypes(include=['float64', 'int64']).columns:
    plt.figure()
    sns.histplot(df[col], kde=True)
    plt.title(col)
    plt.show()

### 3. Model Building and Training

In [None]:
# Choose target column dynamically
target = 'target_column_name'  # <-- change this
X = df.drop(columns=[target])
y = df[target]

# For classification
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.linear_model import LogisticRegression
# model1 = LogisticRegression()
# model2 = RandomForestClassifier()

# For regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
model1 = LinearRegression()
model2 = RandomForestRegressor()

model1.fit(X, y)
model2.fit(X, y)

### 4. Model Evaluation and Validation

In [None]:
# For classification
# from sklearn.model_selection import cross_val_score
# from sklearn.metrics import accuracy_score
# scores1 = cross_val_score(model1, X, y, cv=5, scoring='accuracy')
# scores2 = cross_val_score(model2, X, y, cv=5, scoring='accuracy')
# print("Logistic Regression Accuracy:", scores1.mean())
# print("Random Forest Accuracy:", scores2.mean())

# For regression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
scores1 = cross_val_score(model1, X, y, cv=5, scoring='neg_mean_squared_error')
scores2 = cross_val_score(model2, X, y, cv=5, scoring='neg_mean_squared_error')
print("Linear Regression MSE:", -scores1.mean())
print("Random Forest MSE:", -scores2.mean())

### 5. Comparison of Models

In [None]:
# Just print scores
print("Model 1 Score:", -scores1.mean())
print("Model 2 Score:", -scores2.mean())

### 6. Code Quality and Execution

In [None]:
# Code is modular and clear. No runtime errors should be here.

### 7. Interpretation and Presentation of Results

In [None]:
print("Model with better performance is selected based on cross-validation scores.")