In [13]:
##Classification problem
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
df = pd.read_csv("Personality_Dataset.csv")  # Replace with your actual CSV filename

# Debugging: Check initial data
print("🔎 Raw Data Preview:")
print(df.head())
print("🧠 Personality values:", df['Personality'].unique())

# Step 2: Clean the 'Personality' column
df['Personality'] = df['Personality'].astype(str).str.strip().str.lower()

# Keep only introvert/extrovert entries
df = df[df['Personality'].isin(['introvert', 'extrovert'])]

# Step 3: Map to numeric
df['Personality'] = df['Personality'].map({'introvert': 0, 'extrovert': 1})

# Step 4: Drop missing values
df = df.dropna()

# Step 5: Separate X and y
X = df.drop('Personality', axis=1)
y = df['Personality']

# Step 6: Encode categorical variables
X_encoded = pd.get_dummies(X, drop_first=True)

# Step 7: Ensure non-empty dataset
if len(X_encoded) == 0 or len(y) == 0:
    raise ValueError("❌ ERROR: No data left after preprocessing. Please check your CSV values.")

# Step 8: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Step 9: Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Step 10: Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("✅ Model trained successfully!")
print("📊 Accuracy on test set:", accuracy)


🔎 Raw Data Preview:
   Unnamed: 0  Time_spent_Alone Stage_fear  Social_event_attendance  \
0           0               4.0         No                      4.0   
1           1               9.0        Yes                      0.0   
2           2               9.0        Yes                      1.0   
3           3               0.0         No                      6.0   
4           4               3.0         No                      9.0   

   Going_outside Drained_after_socializing  Friends_circle_size  \
0            6.0                        No                 13.0   
1            0.0                       Yes                  0.0   
2            2.0                       Yes                  5.0   
3            7.0                        No                 14.0   
4            4.0                        No                  8.0   

   Post_frequency  Personality  
0             5.0            1  
1             3.0            0  
2             2.0            0  
3             8.0 

ValueError: ❌ ERROR: No data left after preprocessing. Please check your CSV values.

In [17]:
##Regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
df = pd.read_csv("sales.csv")

# Drop unnecessary columns
df = df.drop(columns=["Unnamed: 0", "Unnamed: 0.1", "Date"])

# Drop rows with missing values
df = df.dropna()

# Define target and features
X = df.drop("Total Amount", axis=1)  # Features
y = df["Total Amount"]               # Target

# One-hot encode categorical columns (if any)
X_encoded = pd.get_dummies(X, drop_first=True)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("✅ Model trained successfully!")
print(f"📉 Mean Squared Error: {mse:.2f}")
print(f"📈 R² Score: {r2:.2f}")


✅ Model trained successfully!
📉 Mean Squared Error: 41883.55
📈 R² Score: 0.86
