In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
# For example, we will use the 'Iris' dataset for illustration. Replace this with your own dataset.
df = pd.read_csv('path_to_your_dataset.csv')

# Show the first few rows of the dataset
df.head()

# Data Exploration
print("Missing Values:")
print(df.isnull().sum())

# 1. Handling missing values (Example: Imputation with mean for numerical columns)
df.fillna(df.mean(), inplace=True)

# 2. Remove duplicates
df.drop_duplicates(inplace=True)

# 3. Encoding categorical variables (if applicable)
# Assuming 'category_column' is a categorical feature
df['category_column'] = df['category_column'].astype('category').cat.codes

# 4. Normalize/Scale numerical features
scaler = StandardScaler()
numerical_columns = df.select_dtypes(include=[np.number]).columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split the data into features (X) and target (y)
X = df.drop('target_column', axis=1)  # Replace 'target_column' with your actual target column name
y = df['target_column']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a machine learning model (RandomForestClassifier as an example)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test