In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv('demographic_employment_data.csv')

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Convert categorical columns to numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Gender', 'Country', 'City', 'Occupation', 'Marital Status', 'Education Level', 'Employment Status'])

# Define the feature columns and the target column
# Assuming 'Employment Status_Employed' is the target for logistic regression
X = data.drop(columns=['ID', 'Name', 'Employment Status_Employed'])
y = data['Employment Status_Employed']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)


   ID           Name  Age  Gender    Country      City Occupation     Salary  \
0   1  Alice Johnson   60    Male  Australia    London     Artist   50792.18   
1   2      Chris Lee   44  Female         UK    Sydney     Lawyer  119924.18   
2   3       John Doe   28   Other        USA    Berlin     Lawyer   47043.78   
3   4  Michael Brown   21    Male        USA  New York     Artist   63998.65   
4   5  Alice Johnson   38    Male     Canada  New York   Engineer  100520.53   

  Marital Status    Education Level  Years of Experience Employment Status  
0         Single    Master's Degree                   22     Self-employed  
1       Divorced    Master's Degree                   28        Unemployed  
2        Married                PhD                    1        Unemployed  
3         Single  Bachelor's Degree                   28     Self-employed  
4       Divorced  Bachelor's Degree                   15        Unemployed  
Accuracy: 1.0
Classification Report:
              precis

In [2]:
import pandas as pd
import random

# Load the dataset
data = pd.read_csv('demographic_employment_data.csv')

# Add employment status for 12 months
for month in range(1, 13):
    data[f'Employment Status Month {month}'] = [random.choice(['Employed', 'Unemployed', 'Self-employed']) for _ in range(len(data))]

# Save the modified dataset
data.to_csv('demographic_employment_data_12_months.csv', index=False)
print("Modified dataset saved as 'demographic_employment_data_12_months.csv'")


Modified dataset saved as 'demographic_employment_data_12_months.csv'
