<a href="https://colab.research.google.com/github/SondiSohan/AI---SHOPPING-ASSISTANT-ChicChoice-AI-/blob/main/ChicChoice_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#synthetic dataset, including the generation of purchase data and saving it to a CSV file

In [3]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Parameters for dataset
num_users = 10000
num_items = 500
num_interactions = 100000

# Generate user data
user_ids = np.random.choice(range(1, num_users + 1), num_interactions)
ages = np.random.randint(18, 70, num_interactions)
genders = np.random.choice(['Male', 'Female'], num_interactions)

# Generate browsing and purchase data
browsing_history = np.random.randint(1, 100, num_interactions)
previous_purchases = np.random.randint(0, 50, num_interactions)

# Generate item data
item_ids = np.random.choice(range(1, num_items + 1), num_interactions)
item_categories = np.random.choice(['Tops', 'Bottoms', 'Dresses', 'Outerwear', 'Accessories'], num_interactions)
item_prices = np.round(np.random.uniform(10, 500, num_interactions), 2)
item_colors = np.random.choice(['Red', 'Blue', 'Green', 'Black', 'White'], num_interactions)
item_sizes = np.random.choice(['XS', 'S', 'M', 'L', 'XL'], num_interactions)

# Generate purchase data
purchase = np.random.choice([0, 1], num_interactions, p=[0.8, 0.2])  # Assuming 20% purchase rate

# Create DataFrame
data = pd.DataFrame({
    'user_id': user_ids,
    'age': ages,
    'gender': genders,
    'browsing_history': browsing_history,
    'previous_purchases': previous_purchases,
    'item_id': item_ids,
    'item_category': item_categories,
    'item_price': item_prices,
    'item_color': item_colors,
    'item_size': item_sizes,
    'purchase': purchase
})

# Save to CSV
data.to_csv('ai_shopping_assistant_data.csv', index=False)

print("Dataset generated and saved as 'ai_shopping_assistant_data.csv'")


Dataset generated and saved as 'ai_shopping_assistant_data.csv'


# Steps Involved:

---
*  Data Collection: Collect user interaction data including demographics, browsing history, previous purchases, and item details.
* Data Preprocessing: Clean and preprocess the data to make it suitable for training machine learning models.
* Model Training: Train a machine learning model to predict user behavior, such as whether they will make a purchase.
* Model Evaluation: Evaluate the performance of the trained model to ensure it provides accurate predictions.
* Integration: Integrate the trained model into the shopping assistant application to provide personalized recommendations and improve user experience.



# Here’s how the code fits into an AI shopping assistant workflow:


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Function to preprocess data
def preprocess_data(data):
    # Identify categorical and numerical columns
    categorical_features = ['gender', 'item_category', 'item_color', 'item_size']
    numerical_features = ['age', 'browsing_history', 'previous_purchases', 'item_price']

    # Preprocessing for numerical data
    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    # Preprocessing for categorical data
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    # Bundle preprocessing for numerical and categorical data
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numerical_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ])

    # Preprocess data
    data_preprocessed = preprocessor.fit_transform(data)

    return data_preprocessed, preprocessor

# Load data
data = pd.read_csv('ai_shopping_assistant_data.csv')

# Ensure the target variable is binary
if not all(data['purchase'].isin([0, 1])):
    raise ValueError("Target variable 'purchase' contains non-binary values")

# Preprocess data
X = data.drop('purchase', axis=1)
y = data['purchase']

# Apply preprocessing
X_preprocessed, preprocessor = preprocess_data(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
accuracy = model.score(X_test, y_test)
print(f'Accuracy: {accuracy}')

# Predicting purchase for a new user interaction
new_interaction = pd.DataFrame({
    'age': [25],
    'gender': ['Female'],
    'browsing_history': [30],
    'previous_purchases': [5],
    'item_category': ['Tops'],
    'item_price': [50.00],
    'item_color': ['Red'],
    'item_size': ['M']
})

# Preprocess the new interaction data
new_interaction_preprocessed = preprocessor.transform(new_interaction)

# Predict purchase probability
purchase_prediction = model.predict(new_interaction_preprocessed)
purchase_probability = model.predict_proba(new_interaction_preprocessed)[:, 1]

print(f'Purchase Prediction: {purchase_prediction[0]}')
print(f'Purchase Probability: {purchase_probability[0]}')


Accuracy: 0.7968
Purchase Prediction: 0
Purchase Probability: 0.22
