In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
file_path = "Updated_Indian_Army_Pistols_Corrected.csv"  # Update path if needed
df = pd.read_csv(file_path)

# Handle missing values
imputer = SimpleImputer(strategy='most_frequent')  # For categorical features
df[df.select_dtypes(include=['object']).columns] = imputer.fit_transform(df.select_dtypes(include=['object']))

imputer_numeric = SimpleImputer(strategy='median')  # For numerical features
df[df.select_dtypes(include=['number']).columns] = imputer_numeric.fit_transform(df.select_dtypes(include=['number']))

# Features and Label
numeric_features = ['Barrel Length', 'Caliber', 'Magazine Capacity', 'Weight', 'Price']
categorical_features = ['Action Type', 'Sight Type', 'Military Use']
ordinal_features = {
    'Reliability': {'Poor': 0, 'Average': 1, 'Good': 2, 'Excellent': 3},
    'Recoil Level': {'Low': 0, 'Medium': 1, 'High': 2},
    'Concealability': {'Low': 0, 'Medium': 1, 'High': 2},
}

label = 'Best Use Case'  # Ensure this column exists

# Encode ordinal features
for feature, mapping in ordinal_features.items():
    if feature in df.columns:
        df[feature] = df[feature].map(mapping)

# Encode the label
label_encoder = LabelEncoder()
df[label] = label_encoder.fit_transform(df[label])

# One-hot encode categorical features
df = pd.get_dummies(df, columns=categorical_features)

# Standardize numeric features
scaler = StandardScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# Feature selection
X = df[numeric_features + list(ordinal_features.keys()) + list(df.columns[df.columns.str.startswith(tuple(categorical_features))])]
y = df[label]

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Model Training
model = RandomForestClassifier(n_estimators=500, random_state=42, class_weight="balanced", max_depth=15)
model.fit(X_train, y_train)

# Evaluate Model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Function to Recommend Best Pistol for a Mission
def recommend_pistol(mission_type, pistol_data):
    if mission_type not in label_encoder.classes_:
        return "Invalid Mission Type. Please enter a valid mission."
    
    mission_code = label_encoder.transform([mission_type])[0]
    filtered_pistols = pistol_data[pistol_data[label] == mission_code]

    if filtered_pistols.empty:
        return f"No suitable pistol found for {mission_type}."

    best_pistol = filtered_pistols.iloc[0]['Pistol Name']
    return f"Recommended Pistol for {mission_type}: {best_pistol}"

# User Input
mission = input("Enter the mission type: ")
pistol_suggestion = recommend_pistol(mission, df)
print(pistol_suggestion)


Model Accuracy: 15.00%


Enter the mission type:  Training


Recommended Pistol for Training: Glock 17


In [59]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.feature_selection import SelectKBest, f_classif
import xgboost as xgb
import warnings

warnings.filterwarnings("ignore")

# Load dataset
file_path = "Updated_Indian_Army_Pistols_Corrected.csv"
df = pd.read_csv(file_path)

# Define essential columns
pistol_name_column = "Pistol Name"
label = "Best Use Case"

# **Ensure essential columns exist**
if pistol_name_column not in df.columns or label not in df.columns:
    raise KeyError(f"Missing essential columns in dataset. Found: {df.columns.tolist()}")

# **Backup "Pistol Name" separately to restore later**
pistol_names = df[pistol_name_column].copy()

# Define feature categories
numeric_features = ['Barrel Length', 'Caliber', 'Magazine Capacity', 'Weight', 'Price']
categorical_features = ['Action Type', 'Sight Type', 'Military Use']
ordinal_features = {
    'Recoil Level': {'Low': 0, 'Medium': 1, 'High': 2},
    'Concealability': {'Low': 0, 'Medium': 1, 'High': 2},
}

# Handle missing values
imputer = SimpleImputer(strategy='most_frequent')
df[numeric_features] = imputer.fit_transform(df[numeric_features])

# Encode ordinal features
for feature, mapping in ordinal_features.items():
    df[feature] = df[feature].map(mapping)

# Encode label
label_encoder = LabelEncoder()
df[label] = label_encoder.fit_transform(df[label])

# One-hot encode categorical features
df = pd.get_dummies(df, columns=categorical_features)

# Standardize numeric features
scaler = StandardScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# **Ensure all features are numeric (excluding "Pistol Name")**
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)

# **DEBUG: Print columns before filtering**
print("Columns before filtering:", df.columns.tolist())

# **Keep "Pistol Name" before filtering low-variance features**
df[pistol_name_column] = pistol_names

# **Remove low-variance features but KEEP "Pistol Name"**
df_filtered = df.loc[:, df.nunique() > 1]
df_filtered[pistol_name_column] = pistol_names  # Restore after filtering

# **DEBUG: Print columns after filtering**
print("Columns after filtering:", df_filtered.columns.tolist())

# **Ensure essential columns exist**
if pistol_name_column in df_filtered.columns and label in df_filtered.columns:
    X = df_filtered.drop(columns=[label, pistol_name_column], errors='ignore')
    y = df_filtered[label]
else:
    raise KeyError(f"Missing essential columns AFTER preprocessing. Found: {df_filtered.columns.tolist()}")

# Handle class imbalance
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Feature selection
k_best = min(15, X_resampled.shape[1])
selector = SelectKBest(score_func=f_classif, k=k_best)
X_selected = selector.fit_transform(X_resampled, y_resampled)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Train model with XGBoost
model = xgb.XGBClassifier(n_estimators=500, max_depth=10, learning_rate=0.1, eval_metric='mlogloss', random_state=42)
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Optimized Model Accuracy: {accuracy * 100:.2f}%")

# Function to Recommend Best Pistol
def recommend_pistol(mission_type, pistol_data, label_encoder):
    if mission_type not in label_encoder.classes_:
        return "Invalid Mission Type. Please enter a valid mission."
    
    mission_code = label_encoder.transform([mission_type])[0]
    filtered_pistols = pistol_data[pistol_data[label] == mission_code]
    
    if filtered_pistols.empty:
        return f"No suitable pistol found for {mission_type}."
    
    best_pistol = filtered_pistols.iloc[0][pistol_name_column] if pistol_name_column in filtered_pistols.columns else "No Name Available"
    
    return f"Recommended Pistol for {mission_type}: {best_pistol}"

# Example Usage
if __name__ == "__main__":
    mission = input("Enter the mission type: ")
    pistol_suggestion = recommend_pistol(mission, df, label_encoder)
    print(pistol_suggestion)


Columns before filtering: ['Pistol Name', 'Barrel Length', 'Caliber', 'Magazine Capacity', 'Weight', 'Reliability', 'Recoil Level', 'Concealability', 'Price', 'Best Use Case', 'Action Type_Double Action', 'Action Type_Semi-Automatic', 'Action Type_Single Action', 'Action Type_Striker-Fired', 'Sight Type_Iron Sights', 'Sight Type_Laser Sight', 'Sight Type_Night Vision', 'Sight Type_Optical', 'Military Use_No', 'Military Use_Yes']
Columns after filtering: ['Pistol Name', 'Barrel Length', 'Caliber', 'Magazine Capacity', 'Weight', 'Reliability', 'Recoil Level', 'Price', 'Best Use Case', 'Action Type_Double Action', 'Action Type_Semi-Automatic', 'Action Type_Single Action', 'Action Type_Striker-Fired', 'Sight Type_Iron Sights', 'Sight Type_Laser Sight', 'Sight Type_Night Vision', 'Sight Type_Optical', 'Military Use_No', 'Military Use_Yes']
Optimized Model Accuracy: 41.67%


Enter the mission type:  Training


Recommended Pistol for Training: Glock 17


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectKBest, f_classif
import xgboost as xgb
import warnings
import pickle

warnings.filterwarnings("ignore")

# Load dataset
file_path = "Updated_Indian_Army_Pistols_Corrected.csv"
df = pd.read_csv(file_path)

# Define essential columns
pistol_name_column = "Pistol Name"
label = "Best Use Case"

# **Ensure essential columns exist**
if pistol_name_column not in df.columns or label not in df.columns:
    raise KeyError(f"Missing essential columns in dataset. Found: {df.columns.tolist()}")

# **Backup "Pistol Name" separately to restore later**
pistol_names = df[pistol_name_column].copy()

# Define feature categories
numeric_features = ['Barrel Length', 'Caliber', 'Magazine Capacity', 'Weight', 'Price']
categorical_features = ['Action Type', 'Sight Type', 'Military Use']
ordinal_features = {
    'Recoil Level': {'Low': 0, 'Medium': 1, 'High': 2},
    'Concealability': {'Low': 0, 'Medium': 1, 'High': 2},
}

# Handle missing values
imputer = SimpleImputer(strategy='mean')
df[numeric_features] = imputer.fit_transform(df[numeric_features])

# Encode ordinal features
for feature, mapping in ordinal_features.items():
    df[feature] = df[feature].map(mapping)

# Encode label
label_encoder = LabelEncoder()
df[label] = label_encoder.fit_transform(df[label])

# One-hot encode categorical features
df = pd.get_dummies(df, columns=categorical_features)

# Standardize numeric features
scaler = StandardScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

# **Ensure all features are numeric (excluding "Pistol Name")**
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)

# **Keep "Pistol Name" before filtering low-variance features**
df[pistol_name_column] = pistol_names

# **Remove low-variance features but KEEP "Pistol Name"**
df_filtered = df.loc[:, df.nunique() > 1]
df_filtered[pistol_name_column] = pistol_names  # Restore after filtering

# **Ensure essential columns exist**
if pistol_name_column in df_filtered.columns and label in df_filtered.columns:
    X = df_filtered.drop(columns=[label, pistol_name_column], errors='ignore')
    y = df_filtered[label]
else:
    raise KeyError(f"Missing essential columns AFTER preprocessing. Found: {df_filtered.columns.tolist()}")

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Feature selection (Increase k to capture more important features)
k_best = min(100, X_resampled.shape[1])
selector = SelectKBest(score_func=f_classif, k=k_best)
X_selected = selector.fit_transform(X_resampled, y_resampled)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)

# Optimized XGBoost model with fine-tuned parameters
model = xgb.XGBClassifier(
    n_estimators=1500, 
    max_depth=15, 
    learning_rate=0.03, 
    subsample=0.9, 
    colsample_bytree=0.9, 
    eval_metric='mlogloss', 
    use_label_encoder=False, 
    random_state=42
)
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Optimized Model Accuracy: {accuracy * 100:.2f}%")

# Function to Recommend Best Pistol
def recommend_pistol(mission_type, pistol_data, label_encoder):
    if mission_type not in label_encoder.classes_:
        return "Invalid Mission Type. Please enter a valid mission."
    
    mission_code = label_encoder.transform([mission_type])[0]
    filtered_pistols = pistol_data[pistol_data[label] == mission_code]
    
    if filtered_pistols.empty:
        return f"No suitable pistol found for {mission_type}."
    
    best_pistol = filtered_pistols.iloc[0][pistol_name_column] if pistol_name_column in filtered_pistols.columns else "No Name Available"
    
    return f"Recommended Pistol for {mission_type}: {best_pistol}"

# Example Usage
if __name__ == "__main__":
    mission = input("Enter the mission type: ")
    pistol_suggestion = recommend_pistol(mission, df, label_encoder)
    print(pistol_suggestion)

# Save model and preprocessing objects
with open("pistol_recommender.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

with open("label_encoder.pkl", "wb") as le_file:
    pickle.dump(label_encoder, le_file)

with open("feature_selector.pkl", "wb") as fs_file:
    pickle.dump(selector, fs_file)

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Model and preprocessing objects saved successfully.")


Optimized Model Accuracy: 45.83%


Enter the mission type:  Surgical Strike


Recommended Pistol for Surgical Strike: Colt M1911
Model and preprocessing objects saved successfully.
