In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # Or any other algorithm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# 1. Load your data (replace with your actual data loading)
try:
    data = pd.read_csv("your_data.csv") # Replace "your_data.csv" with your file
except FileNotFoundError:
    print("Error: File not found. Please check the file path.")
    exit() # or handle the error differently


# 2. Handle Null Values
def handle_null_values(df):
    """Handles null values by either dropping rows with any nulls, or imputing with mean/median/mode."""

    # Option 1: Drop rows with any null values
    # df = df.dropna()

    # Option 2: Impute with mean (for numerical features)
    for col in df.select_dtypes(include=np.number).columns:
        df[col].fillna(df[col].mean(), inplace=True) # fill null values of numerical columns with mean

    #Option 3: Impute with median (for numerical features)
    # for col in df.select_dtypes(include=np.number).columns:
    #   df[col].fillna(df[col].median(), inplace=True)

    # Option 4: Impute categorical features with mode.
    for col in df.select_dtypes(include='object').columns: # or 'category'
        df[col].fillna(df[col].mode()[0], inplace=True) #mode returns a series, select the first element.
    return df

data = handle_null_values(data)



# 3. Prepare Data
X = data.drop("target_variable", axis=1)  # Features (independent variables)
y = data["target_variable"]  # Target variable (dependent variable)



# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% training, 20% testing



# 5. Training the Algorithm (Logistic Regression as example)
model = LogisticRegression(solver='liblinear', random_state=42)  # Or any other algorithm
model.fit(X_train, y_train)  # Train the model



# 6. Testing the Algorithm (Making Predictions)
y_pred = model.predict(X_test)  # Predict on the test set



# 7. Performance Metrics (Classification)
if pd.api.types.is_numeric_dtype(y): #Check if the target is numeric for regression metrics
    # Regression Metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared: {r2}")
    print(f"Mean Absolute Error: {mae}")
else:
    #Classification metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted') # or 'micro', 'macro'
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")

In [None]:

 
import numpy as np

def detect_outliers_iqr(data):
    """
    Detects outliers using the IQR method.

    Args:
        data: A list or numpy array of numerical data.

    Returns:
        A list of outliers.
    """
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = [x for x in data if x < lower_bound or x > upper_bound]
    return outliers

# Example Usage
data = [10, 12, 15, 11, 13, 100, 12, 14, 11, 13]
outliers = detect_outliers_iqr(data)
print("Outliers:", outliers)