In [25]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

# Load dataset from CSV
file_name = "lungcancer.csv"  # Ensure this file exists in the same directory
data = pd.read_csv(file_name)

# Drop unnecessary columns (e.g., Name, Surname if present)
if "Name" in data.columns and "Surname" in data.columns:
    data.drop(["Name", "Surname"], axis=1, inplace=True)

# Renaming columns for clarity
column_mapping = {"AreaQ": "Air Quality", "Alkhol": "Alcohol"}
data.rename(columns=column_mapping, inplace=True)

# Splitting features and target variable
X = data.drop("Result", axis=1)
y = data["Result"]

# Save column names for later use
feature_names = X.columns.tolist()

# Standardizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the trained model, scaler, and feature names
pickle.dump(model, open("lung_cancer_model.pkl", "wb"))
pickle.dump(scaler, open("lung_cancer_scaler.pkl", "wb"))
pickle.dump(feature_names, open("lung_cancer_features.pkl", "wb"))


# Load the saved model, scaler, and feature names
model = pickle.load(open("lung_cancer_model.pkl", "rb"))
scaler = pickle.load(open("lung_cancer_scaler.pkl", "rb"))
feature_names = pickle.load(open("lung_cancer_features.pkl", "rb"))

# Function to make predictions
def predict_lung_cancer():
    print("\nEnter your details for lung cancer prediction:")
    age = float(input("Age: "))
    smokes = float(input("Cigarettes per day: "))
    air_quality = float(input("Air Quality (1-10): "))
    alcohol = float(input("Alcohol consumption (1-10): "))

    # Convert input to DataFrame with correct feature names
    user_data = pd.DataFrame([[age, smokes, air_quality, alcohol]], columns=feature_names)
    user_data_scaled = scaler.transform(user_data)

    prediction = model.predict(user_data_scaled)
    result = "Lung Cancer Detected" if prediction[0] == 1 else "No Lung Cancer Detected"
    print(f"\nPrediction: {result}")

# Call prediction function
predict_lung_cancer()



Enter your details for lung cancer prediction:


Age:  23
Cigarettes per day:  2
Air Quality (1-10):  7
Alcohol consumption (1-10):  4



Prediction: No Lung Cancer Detected
