In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.stats import randint
import joblib

In [None]:

# Step 1: Load the dataset
file_path = "data.csv"  # Replace with your actual file path
data = pd.read_csv(file_path)

In [None]:
# Step 2: Explore the dataset
print("Dataset Shape:", data.shape)
print("Dataset Info:")
print(data.info())
print("First few rows:")
print(data.head())


In [None]:
# Step 3: Preprocess the data
# Handle missing values
data.fillna(data.mean(numeric_only=True), inplace=True)
data.fillna(method='ffill', inplace=True)

In [None]:
# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le


In [None]:
# Separate features (X) and target (y)
X = data.drop("Role", axis=1)  # Replace 'TargetColumn' with your target variable's name
y = data["Role"]

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:


# Step 5: Define the Random Forest model and Randomized Search parameters
rf_model = RandomForestClassifier(random_state=42)

# Define the parameter distribution
param_dist = {
    'n_estimators': randint(100, 500),
    'max_depth': [10, 20, None],
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'bootstrap': [True, False]
}

# Perform Randomized Search
random_search = RandomizedSearchCV(estimator=rf_model, param_distributions=param_dist,
                                   n_iter=50, cv=3, scoring='accuracy', verbose=2,
                                   n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = random_search.best_params_
best_model = random_search.best_estimator_

print(f"Best Parameters: {best_params}")

# Step 6: Evaluate the fine-tuned model
y_pred_tuned = best_model.predict(X_test)

accuracy_tuned = accuracy_score(y_test, y_pred_tuned)
print(f"Tuned Model Accuracy: {accuracy_tuned:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_tuned))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

# Step 7: Save the model for future use
joblib.dump(best_model, "fine_tuned_career_model.pkl")
print("Fine-tuned model saved as 'fine_tuned_career_model.pkl'")


4