# Explanation:
Static Data:

The data dictionary contains features such as age, salary, experience, city, and purchased (binary target for classification).
Initialization:

The MachineLearningIntro class initializes with the DataFrame created from the static data and sets up the necessary preprocessing tools.
Data Preprocessing:

The preprocess_data method encodes categorical variables (city), scales numerical features (age, salary, experience), and splits the data into training and test sets for both regression and classification tasks.
Model Training and Evaluation:

The train_regression_model method trains a Linear Regression model.
The evaluate_regression_model method evaluates the regression model using Mean Squared Error.
The train_classification_model method trains a Logistic Regression model.
The evaluate_classification_model method evaluates the classification model using Accuracy and Confusion Matrix.
Prediction:

The predict method allows making predictions on new data using the trained models. The new data is preprocessed in the same way as the training data

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Define static data
data = {
    'age': [25, 32, 45, 28, 34],
    'salary': [50000, 60000, 80000, 45000, 70000],
    'experience': [2, 7, 10, 4, 6],
    'city': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles'],
    'purchased': [0, 1, 0, 0, 1]  # Binary target variable for classification
}

# Convert static data to DataFrame
df = pd.DataFrame(data)

# Define the machine learning class
class MachineLearningIntro:
    def __init__(self, dataframe):
        self.df = dataframe
        self.label_encoder = LabelEncoder()
        self.scaler = StandardScaler()
    
    def preprocess_data(self):
        # Encode categorical variables
        self.df['city_encoded'] = self.label_encoder.fit_transform(self.df['city'])
        
        # Define features and targets
        self.X = self.df[['age', 'salary', 'experience', 'city_encoded']]
        self.y_reg = self.df['salary']  # Target for regression
        self.y_clf = self.df['purchased']  # Target for classification
        
        # Scale numerical features
        self.X_scaled = self.scaler.fit_transform(self.X)
        
        # Split data into training and test sets
        self.X_train_reg, self.X_test_reg, self.y_train_reg, self.y_test_reg = train_test_split(self.X_scaled, self.y_reg, test_size=0.2, random_state=42)
        self.X_train_clf, self.X_test_clf, self.y_train_clf, self.y_test_clf = train_test_split(self.X_scaled, self.y_clf, test_size=0.2, random_state=42)
    
    def train_regression_model(self):
        # Train a Linear Regression model
        self.reg_model = LinearRegression()
        self.reg_model.fit(self.X_train_reg, self.y_train_reg)
    
    def evaluate_regression_model(self):
        # Evaluate the Linear Regression model
        self.y_pred_reg = self.reg_model.predict(self.X_test_reg)
        mse = mean_squared_error(self.y_test_reg, self.y_pred_reg)
        print(f"Mean Squared Error (Regression): {mse}")
    
    def train_classification_model(self):
        # Train a Logistic Regression model
        self.clf_model = LogisticRegression()
        self.clf_model.fit(self.X_train_clf, self.y_train_clf)
    
    def evaluate_classification_model(self):
        # Evaluate the Logistic Regression model
        self.y_pred_clf = self.clf_model.predict(self.X_test_clf)
        acc = accuracy_score(self.y_test_clf, self.y_pred_clf)
        cm = confusion_matrix(self.y_test_clf, self.y_pred_clf)
        print(f"Accuracy (Classification): {acc}")
        print(f"Confusion Matrix (Classification):\n{cm}")
    
    def predict(self, new_data):
        # Scale the new data
        new_data_scaled = self.scaler.transform(new_data)
        reg_prediction = self.reg_model.predict(new_data_scaled)
        clf_prediction = self.clf_model.predict(new_data_scaled)
        return reg_prediction, clf_prediction

# Initialize the machine learning class
ml_intro = MachineLearningIntro(df)

# Preprocess data
ml_intro.preprocess_data()

# Train and evaluate regression model
ml_intro.train_regression_model()
ml_intro.evaluate_regression_model()

# Train and evaluate classification model
ml_intro.train_classification_model()
ml_intro.evaluate_classification_model()

# Predict with new data
new_data = pd.DataFrame({
    'age': [30, 40],
    'salary': [65000, 80000],
    'experience': [5, 8],
    'city': ['Chicago', 'New York']
})

# Encode new data
new_data['city_encoded'] = ml_intro.label_encoder.transform(new_data['city'])
new_data_processed = new_data[['age', 'salary', 'experience', 'city_encoded']]

# Predictions
reg_pred, clf_pred = ml_intro.predict(new_data_processed)
print(f"Regression Predictions: {reg_pred}")
print(f"Classification Predictions: {clf_pred}")


Mean Squared Error (Regression): 0.0
Accuracy (Classification): 0.0
Confusion Matrix (Classification):
[[0 0]
 [1 0]]
Regression Predictions: [65000. 80000.]
Classification Predictions: [0 0]
