In [None]:
# Drug Addiction Risk Prediction Using Machine Learning

## Overview
#This project predicts the risk of drug addiction using behavioral and social factors.
#A synthetic dataset was generated to ensure ethical compliance and data privacy.

## Features Used
#- Age
#- Stress Level
#- Peer Pressure
#- Family History
#- Mental Health Score
#- Academic Pressure

## Machine Learning Models
#- Logistic Regression
#- Decision Tree Classifier

## Results
#Both models achieved reliable accuracy in identifying high-risk individuals.

## Ethical Considerations
#- No real personal data was used.
#- Dataset is fully synthetic.
#- Model is intended for educational and awareness purposes only.
#- Predictions should not be used for medical diagnosis.

## Technologies
#- Python
#- Scikit-learn
#- Pandas
#- NumPy

## Future Improvements
#- Include more behavioral factors
#- Use ensemble models
#- Deploy as a web application


In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

n = 500

data = {
    "age": np.random.randint(15, 45, n),
    "stress_level": np.random.randint(1, 10, n),
    "peer_pressure": np.random.randint(0, 2, n),
    "family_history": np.random.randint(0, 2, n),
    "mental_health_score": np.random.randint(1, 10, n),
    "academic_pressure": np.random.randint(1, 10, n),
}

df = pd.DataFrame(data)

df["addiction_risk"] = (
    (df["stress_level"] > 6).astype(int) |
    (df["peer_pressure"] == 1) |
    (df["family_history"] == 1)
).astype(int)

df.head()


Unnamed: 0,age,stress_level,peer_pressure,family_history,mental_health_score,academic_pressure,addiction_risk
0,21,7,1,0,6,5,1
1,34,3,0,1,3,2,1
2,43,2,1,1,1,9,1
3,29,9,0,0,8,9,1
4,25,8,0,0,5,1,1


In [None]:
df.to_csv("addiction_dataset.csv", index=False)
print("Dataset saved!")


Dataset saved!


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

X = df.drop("addiction_risk", axis=1)
y = df["addiction_risk"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_pred))
print(classification_report(y_test, lr_pred))

# Decision Tree
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print(classification_report(y_test, dt_pred))


Logistic Regression Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        21
           1       1.00      1.00      1.00        79

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100

Decision Tree Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        21
           1       1.00      1.00      1.00        79

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100



In [None]:
import pandas as pd

importance = pd.Series(
    dt.feature_importances_,
    index=X.columns
).sort_values(ascending=False)

importance


Unnamed: 0,0
peer_pressure,0.424805
stress_level,0.324857
family_history,0.250338
age,0.0
mental_health_score,0.0
academic_pressure,0.0
