In [26]:
# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load Dataset
df = pd.read_csv("/content/34c.csv")  # Replace with actual file path

# Resume Matching

# Combine skills into a single text field
resume_texts = df[['Skill 1', 'Skill 2']].astype(str).agg(' '.join, axis=1)

tfidf_vectorizer = TfidfVectorizer()
X_resume = tfidf_vectorizer.fit_transform(resume_texts)

y_resume = df['Suggested Jobs']  # Target variable

# Split Data
X_resume_train, X_resume_test, y_resume_train, y_resume_test = train_test_split(
    X_resume, y_resume, test_size=0.2, random_state=42)

# Train SVM Model
resume_model = SVC(kernel='linear', C=1.0)
resume_model.fit(X_resume_train, y_resume_train)

# Evaluate SVM Model
y_resume_pred = resume_model.predict(X_resume_test)
print("Resume Matching Accuracy:", accuracy_score(y_resume_test, y_resume_pred))
print("Resume Matching Classification Report:\n", classification_report(y_resume_test, y_resume_pred))

# ----- Fraud Detection -----

# Ensure necessary columns exist
required_columns = ['Age Limit ', 'Gender', 'Location', 'State', 'Address']
for col in required_columns:
    if col not in df.columns:
        raise ValueError(f"Column '{col}' is missing from the dataset!")

# Label Encoding for categorical features
le = LabelEncoder()
for col in ['Gender', 'Location', 'State', 'Address']:
    df[col] = le.fit_transform(df[col].astype(str))

# Convert 'Age Limit ' to numeric, handling potential errors
# Try converting to numeric, coerce errors to NaN
df['Age Limit '] = pd.to_numeric(df['Age Limit '], errors='coerce')
# If there are NaNs, fill them with a suitable value (e.g., mean, median)
df['Age Limit '].fillna(df['Age Limit '].mean(), inplace=True)


# Create synthetic 'Is_Fraudulent' column (example rule)
df['Is_Fraudulent'] = 0  # Initialize with 0 (not fraudulent)
df.loc[df['Age Limit '] > 90, 'Is_Fraudulent'] = 1  # Example rule: Age Limit > 90

# Fraud Detection Model
X_fraud = df[required_columns]  # Features
y_fraud = df['Is_Fraudulent']  # Target
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.2, random_state=42)

fraud_model = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='mlogloss')
fraud_model.fit(X_fraud_train, y_fraud_train)
y_fraud_pred = fraud_model.predict(X_fraud_test)

print("Fraud Detection Accuracy:", accuracy_score(y_fraud_test, y_fraud_pred))
print("Fraud Detection Classification Report:\n", classification_report(y_fraud_test, y_fraud_pred))

Resume Matching Accuracy: 0.49230769230769234
Resume Matching Classification Report:
                                         precision    recall  f1-score   support

       Art and Painting freelance work       0.62      0.62      0.62        16
           Book Binding freelance work       0.45      0.75      0.56        12
        Content Writing freelance work       0.50      1.00      0.67        13
            Copywriting freelance work       0.56      1.00      0.71        15
          Data Analysis freelance work       0.00      0.00      0.00        13
             Data Entry freelance work       0.60      0.47      0.53        19
      Digital Marketing freelance work       0.67      0.47      0.55        17
       Freelance Coding freelance work       0.43      0.38      0.40         8
         Graphic Design freelance work       0.75      0.25      0.38        12
      Handicraft Making freelance work       0.50      0.38      0.43        16
           Illustration freelance

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age Limit '].fillna(df['Age Limit '].mean(), inplace=True)
Parameters: { "use_label_encoder" } are not used.

