In [6]:
#Email Spam Detection

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("spam.csv", encoding="latin-1")[["v1", "v2"]]
df.columns = ["label", "message"]

# Label encoding
df["label"] = df["label"].map({"ham": 0, "spam": 1})

# Split data
X_train, X_test, y_train, y_test = train_test_split(df["message"],
                                                    df["label"],
                                                    test_size=0.2,
                                                    random_state=42)

# Text vectorization
tfidf = TfidfVectorizer(stop_words="english")
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Predictions
y_pred = model.predict(X_test_tfidf)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9524663677130045
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       965
           1       0.97      0.67      0.79       150

    accuracy                           0.95      1115
   macro avg       0.96      0.83      0.88      1115
weighted avg       0.95      0.95      0.95      1115



In [4]:
# Car Price Prediction Model

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score


# SAMPLE DATASET
data = {
    "age": [1, 3, 5, 7, 9, 10],
    "mileage": [10000, 30000, 50000, 70000, 90000, 110000],
    "fuel_type": [1, 1, 0, 0, 1, 0],     # 1 = Petrol, 0 = Diesel
    "price": [800000, 650000, 500000, 380000, 300000, 250000]
}

df = pd.DataFrame(data)


# FEATURES & LABEL
X = df[["age", "mileage", "fuel_type"]]
y = df["price"]


# TRAIN TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# MODEL TRAINING
model = LinearRegression()
model.fit(X_train, y_train)

# PREDICTION
y_pred = model.predict(X_test)

# MODEL EVALUATION
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


# PREDICT NEW CAR PRICE

new_car = [[4, 45000, 1]]
predicted_price = model.predict(new_car)
print("Predicted Car Price:", int(predicted_price[0]), "INR")


Mean Absolute Error: 15000.000000338245
R2 Score: 0.9555555555501437
Predicted Car Price: 624999 INR


In [15]:
#SALES PREDICTION USING MACHINE LEARNING

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# --- Load Dataset ---
file_path = "Advertising.csv"   # Change to your dataset file
df = pd.read_csv(file_path)

print("Dataset loaded:", df.shape)

# --- Features (advertising, price, discount) ---
X = df.drop("Sales", axis=1)   # depends on your dataset column names
y = df["Sales"]

# --- Split Dataset ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Train Model ---
model = LinearRegression()
model.fit(X_train, y_train)

# --- Predict ---
y_pred = model.predict(X_test)

# --- Performance ---
print("\nSales Prediction Results")
print("MSE:", mean_squared_error(y_test, y_pred))

# --- Test on custom data ---
sample = X_test.iloc[0:1]
print("\nPredicted Sales for sample:", model.predict(sample))


Dataset loaded: (200, 5)

Sales Prediction Results
MSE: 3.1990044685889063

Predicted Sales for sample: [16.41227699]
