# Computer Vision & Food Recognition

Found 25216 images belonging to 256 classes.
Found 6179 images belonging to 256 classes.


  saving_api.save_model(


In [2]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

class FoodRecognitionModel:
    def __init__(self):
        self.food_recognition_model = None

    def create_food_recognition_model(self, num_classes):
        """Define the food recognition model."""
        model = Sequential([
            Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
            MaxPooling2D(2, 2),
            Conv2D(64, (3, 3), activation='relu'),
            MaxPooling2D(2, 2),
            Conv2D(128, (3, 3), activation='relu'),
            MaxPooling2D(2, 2),
            Flatten(),
            Dense(512, activation='relu'),
            Dropout(0.5),
            Dense(num_classes, activation='softmax')
        ])
        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    def train_food_recognition_model(self, train_data_path, validation_data_path=None, num_classes=101, epochs=20):
        """Train the food recognition model."""

        # Create output folders if not present
        os.makedirs('models', exist_ok=True)
        os.makedirs('reports', exist_ok=True)

        # Create the model
        model = self.create_food_recognition_model(num_classes)

        # Data augmentation
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            validation_split=0.2,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        )

        # Training data
        train_generator = train_datagen.flow_from_directory(
            r'C:\Users\patel\Downloads\UECFOOD256',
            target_size=(224, 224),
            batch_size=32,
            class_mode='categorical',
            subset='training'
        )

        # Validation data
        validation_generator = train_datagen.flow_from_directory(
            r'C:\Users\patel\Downloads\UECFOOD256',
            target_size=(224, 224),
            batch_size=32,
            class_mode='categorical',
            subset='validation'
        )

        # Train the model
        history = model.fit(
            train_generator,
            steps_per_epoch=train_generator.samples // 32,
            epochs=epochs,
            validation_data=validation_generator,
            validation_steps=validation_generator.samples // 32
        )

        # Save the model
        model.save('models/food_recognition_model.h5')
        self.food_recognition_model = model

        # Plot training history
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()

        plt.tight_layout()
        plt.savefig('reports/food_recognition_training.png')

        return model, history
    
    
    
    
    
    def identify_food(self, image_path):
        """Identify food from an image"""
        if self.food_recognition_model is None:
            print("Food recognition model not loaded")
            return None
        
        img = image.load_img(image_path, target_size=(224, 224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0) / 255.0
        
        prediction = self.food_recognition_model.predict(img_array)
        
        # Get class labels
        class_indices = {v: k for k, v in self.food_recognition_model.class_indices.items()}
        predicted_class = class_indices[np.argmax(prediction)]
        confidence = np.max(prediction)
        
        return {
            'food_type': predicted_class,
            'confidence': float(confidence)
        }
    
    
    
    
    
    
    def predict_sodium_content(self, food_type):
        """Predict sodium content based on food type"""
        if self.food_database is not None:
            # Look up sodium content in database
            if food_type in self.food_database['food_name'].values:
                sodium_content = self.food_database[self.food_database['food_name'] == food_type]['sodium_content'].values[0]
                return sodium_content
        
        if self.sodium_prediction_model is not None:
            # If we have a ML model for sodium prediction, use it
            # This would need features extracted from the food type or image
            features = self._extract_food_features(food_type)
            sodium_content = self.sodium_prediction_model.predict([features])[0]
            return sodium_content
            
        # Default fallback
        return None
    
    def _extract_food_features(self, food_type):
        """Extract features from food type for sodium prediction"""
        # This is a placeholder - in a real implementation, you would have a database
        # of food features or extract them from images
        return [0.5, 0.5, 0.5]  # Placeholder features
    
    def recommend_stimulation_level(self, food_type, user_preference):
        """Recommend electrical stimulation level based on food and user preference"""
        sodium_content = self.predict_sodium_content(food_type)
        
        if sodium_content is None:
            # Default recommendation
            return 5  # Mid-level stimulation
        
        # Logic for recommending stimulation level
        # Higher sodium content foods need less stimulation
        base_level = 10 - min(sodium_content / 100, 9)  # Scale from 1-10
        
        # Adjust based on user preference
        # user_preference should be a value between 0-1 indicating preference for saltiness
        adjusted_level = base_level * (1 + (user_preference - 0.5))
        
        # Ensure the level is between 1-10
        return max(1, min(10, adjusted_level))
    


# Data Collection & Analysis for Market Research

In [3]:
def analyze_survey_data(self, survey_results_path):
    """Analyze survey data for market research"""
    # Load survey data
    survey_data = pd.read_csv('survey_data.csv')
    
    # Preprocess data: Create derived columns for analysis
    # Convert text responses to numeric values for analysis
    
    # Map yes/no responses to binary
    binary_map = {'Yes': 1, 'No': 0}
    if 'Do you follow a low-sodium diet?' in survey_data.columns:
        survey_data['sodium_restriction'] = survey_data['Do you follow a low-sodium diet?'].map(binary_map).fillna(0)
    
    # Map frequency to numeric values
    frequency_map = {'Daily': 4, 'Weekly': 3, 'Monthly': 2, 'Rarely': 1}
    if 'How often do you eat at restaurants?' in survey_data.columns:
        survey_data['restaurant_frequency'] = survey_data['How often do you eat at restaurants?'].map(frequency_map).fillna(0)
    
    # Map technology comfort
    tech_comfort_map = {'Yes': 1, 'No': 0}
    if 'Are you aware of technologies that enhance taste perception without adding sodium?' in survey_data.columns:
        survey_data['technology_comfort'] = survey_data['Are you aware of technologies that enhance taste perception without adding sodium?'].map(tech_comfort_map).fillna(0)
    
    # Map purchase interest
    purchase_map = {'Yes': 2, 'Maybe': 1, 'No': 0}
    if 'Would you consider purchasing such a device if it improves taste satisfaction without adding sodium?' in survey_data.columns:
        survey_data['purchase_interest'] = survey_data['Would you consider purchasing such a device if it improves taste satisfaction without adding sodium?'].map(purchase_map).fillna(0)
    
    # Map importance level
    importance_map = {'Very Important': 3, 'Moderately Important': 2, 'Not Important': 1}
    if 'How important is taste enhancement in your dining experience?' in survey_data.columns:
        survey_data['taste_importance'] = survey_data['How important is taste enhancement in your dining experience?'].map(importance_map).fillna(1)
    
    # Map salt preference
    salt_preference_map = {'Too Salty': 2, 'Perfect': 1, 'Too Bland': 0}
    if 'Do you find the current salt content in these dishes' in survey_data.columns:
        survey_data['salt_preference'] = survey_data['Do you find the current salt content in these dishes'].map(salt_preference_map).fillna(1)
    
    # Map device interest
    device_interest_map = {'Yes': 1, 'No': 0}
    if 'Would you be interested in trying a device that enhances salty and umami flavors using electric stimulation?' in survey_data.columns:
        survey_data['device_interest'] = survey_data['Would you be interested in trying a device that enhances salty and umami flavors using electric stimulation?'].map(device_interest_map).fillna(0)
    
    # Convert age to age groups
    survey_data['age_group'] = pd.cut(survey_data['Age'], bins=[0, 25, 40, 60, 100], labels=['18-25', '26-40', '41-60', '60+'])
    
    # Calculate price sensitivity (proxy from purchase interest and importance)
    survey_data['price_sensitivity'] = 3 - (survey_data['purchase_interest'] / 2)  # Higher value means more sensitive
    
    # Basic analysis by age group
    demographic_summary = survey_data.groupby('age_group').agg({
        'sodium_restriction': 'mean',
        'technology_comfort': 'mean',
        'price_sensitivity': 'mean',
        'device_interest': 'mean',
        'purchase_interest': 'mean',
        'taste_importance': 'mean'
    })
    
    # Visualization
    plt.figure(figsize=(12, 8))
    sns.heatmap(demographic_summary, annot=True, cmap='Blues', fmt='.2f')
    plt.title('User Characteristics by Age Group')
    plt.savefig('reports/demographic_analysis.png')
    
    # Medical condition analysis
    medical_conditions = survey_data.groupby('If yes, is it due to a medical condition? ( Type no incase of no)').size()
    medical_conditions = medical_conditions[medical_conditions.index != 'No']
    
    plt.figure(figsize=(10, 6))
    medical_conditions.plot(kind='bar')
    plt.title('Distribution of Medical Conditions Among Low-Sodium Diet Followers')
    plt.ylabel('Count')
    plt.savefig('reports/medical_conditions.png')
    
    # Potential market size calculation
    total_respondents = len(survey_data)
    interested_users = survey_data[survey_data['device_interest'] == 1].shape[0]
    market_penetration = interested_users / total_respondents
    
    print(f"Potential market penetration: {market_penetration:.2%}")
    
    # Device feature preferences
    feature_columns = 'What features would you expect in a taste-enhancement device? (Select all that apply)'
    feature_preferences = {}
    
    if feature_columns in survey_data.columns:
        features = survey_data[feature_columns].dropna()
        
        for response in features:
            for feature in response.split(';'):
                feature_preferences[feature] = feature_preferences.get(feature, 0) + 1
    
    # Plot feature preferences
    plt.figure(figsize=(12, 6))
    features_df = pd.DataFrame.from_dict(feature_preferences, orient='index', columns=['Count'])
    features_df.sort_values(by='Count', ascending=False).plot(kind='bar')
    plt.title('Preferred Features for Taste Enhancement Device')
    plt.ylabel('Count')
    plt.savefig('reports/feature_preferences.png')
    
    # Correlations between key factors
    correlations = survey_data[
        ['sodium_restriction', 'technology_comfort', 'price_sensitivity', 
         'purchase_interest', 'device_interest', 'Age']
    ].corr()
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlations, annot=True, cmap='coolwarm', fmt='.2f')
    plt.title('Correlations Between User Characteristics')
    plt.savefig('reports/correlations.png')
    
    # Salt preferences across different dishes
    salt_cols = [
        'Dal / Gojju / Palya', 'Sambar / Rasam / Curd', 
        'Biryani / Pulao / Rice bath', 'Curries (Vegetable/Chicken/Mutton)',
        'Dosa/Idly/Chaat/Snacks', 'Dosa/Idly/Roti/Paratha/Chapathi',
        'Pickles/Papad'
    ]
    
    # Convert salt measures to numeric values
    salt_map = {'No Salt': 0, '1/4 tsp': 0.25, '1/2 tsp': 0.5, '1 tsp': 1, 'More than 1 tsp': 1.5}
    for col in salt_cols:
        if col in survey_data.columns:
            survey_data[f"{col}_numeric"] = survey_data[col].map(salt_map).fillna(0)
    
    numeric_salt_cols = [f"{col}_numeric" for col in salt_cols if f"{col}_numeric" in survey_data.columns]
    
    if numeric_salt_cols:
        plt.figure(figsize=(14, 8))
        survey_data[numeric_salt_cols].mean().sort_values().plot(kind='bar')
        plt.title('Average Salt Preference Across Different Dishes')
        plt.ylabel('Average Salt Amount (tsp)')
        plt.savefig('reports/salt_preferences.png')
    
    # Generate comprehensive report
    report = {
        'demographic_summary': demographic_summary.to_dict(),
        'market_penetration': market_penetration,
        'correlations': correlations.to_dict(),
        'feature_preferences': feature_preferences,
        'insights': {
            'top_age_groups': demographic_summary.index[demographic_summary['purchase_interest'].nlargest(2).index].tolist(),
            'price_sensitivity_correlation': correlations.loc['price_sensitivity', 'purchase_interest'],
            'medical_condition_prevalence': (survey_data['If yes, is it due to a medical condition? ( Type no incase of no)'] != 'No').mean(),
            'device_interest_by_sodium_restriction': survey_data.groupby('sodium_restriction')['device_interest'].mean().to_dict()
        }
    }
    
    # Create a reports directory if it doesn't exist
    os.makedirs('reports', exist_ok=True)
    
    with open('reports/market_research_report.json', 'w') as f:
        json.dump(report, f, indent=4)
    
    return report

# def analyze_usage_patterns(self, usage_logs_path):
#     """Analyze usage patterns from logs"""
#     # Note: This function is kept as is since you mentioned to keep
#     # any functions that don't need to be modified. If you have usage logs
#     # in the future, this function will be useful.
    
#     # Load usage logs
#     usage_data = pd.read_csv(usage_logs_path)
    
#     # Time series analysis
#     usage_data['date'] = pd.to_datetime(usage_data['timestamp'])
#     daily_usage = usage_data.groupby(usage_data['date'].dt.date).agg({
#         'user_id': 'nunique',
#         'session_duration': 'mean',
#         'taste_intensity_level': 'mean'
#     })
    
#     # Plot trends
#     plt.figure(figsize=(14, 7))
#     plt.subplot(2, 1, 1)
#     plt.plot(daily_usage.index, daily_usage['user_id'], marker='o')
#     plt.title('Daily Active Users')
    
#     plt.subplot(2, 1, 2)
#     plt.plot(daily_usage.index, daily_usage['taste_intensity_level'], marker='o', color='green')
#     plt.title('Average Taste Intensity Settings')
    
#     plt.tight_layout()
#     plt.savefig('reports/usage_trends.png')
    
#     # Identify patterns
#     # User segments based on usage patterns
#     usage_data['usage_frequency'] = usage_data.groupby('user_id')['user_id'].transform('count')
#     usage_data['avg_session_duration'] = usage_data.groupby('user_id')['session_duration'].transform('mean')
    
#     user_segments = usage_data.groupby('user_id').agg({
#         'usage_frequency': 'first',
#         'avg_session_duration': 'first',
#         'taste_intensity_level': 'mean'
#     })
    
#     # K-means clustering to identify user segments
#     from sklearn.cluster import KMeans
#     from sklearn.preprocessing import StandardScaler
    
#     scaler = StandardScaler()
#     scaled_features = scaler.fit_transform(user_segments)
    
#     kmeans = KMeans(n_clusters=3, random_state=42)
#     user_segments['cluster'] = kmeans.fit_predict(scaled_features)
    
#     # Visualize clusters
#     plt.figure(figsize=(10, 8))
#     sns.scatterplot(
#         x='usage_frequency', 
#         y='avg_session_duration', 
#         hue='cluster', 
#         size='taste_intensity_level',
#         sizes=(50, 200),
#         data=user_segments
#     )
#     plt.title('User Segments Based on Usage Patterns')
#     plt.savefig('reports/user_segments.png')
    
#     # Generate insights
#     cluster_profiles = user_segments.groupby('cluster').mean()
    
#     return {
#         'daily_usage': daily_usage.to_dict(),
#         'user_segments': cluster_profiles.to_dict(),
#         'total_users': usage_data['user_id'].nunique(),
#         'total_sessions': len(usage_data)
#     }

FoodRecognitionModel.analyze_survey_data = analyze_survey_data
# FoodRecognitionModel.analyze_usage_patterns = analyze_usage_patterns

# Sentiment Analysis & User Behavior Prediction

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
import nltk
from sklearn.ensemble import RandomForestClassifier
import joblib

In [5]:
def analyze_user_feedback(self, feedback_data_path):
    """Analyze user feedback and sentiment"""
    # For survey data, we'll use the feedback/concerns and suggestions from the survey
    survey_data = pd.read_csv('survey_data.csv')
    
    # Extract feedback columns
    feedback_columns = [
        'Do you have any concerns regarding using technology for taste enhancement?',
        'Any suggestions or feedback regarding low-sodium dining options?'
    ]
    
    feedback_data = pd.DataFrame()
    
    # Combine feedback from different columns
    for col in feedback_columns:
        if col in survey_data.columns:
            # Extract non-empty responses
            valid_feedback = survey_data[col].dropna()
            valid_feedback = valid_feedback[valid_feedback.str.strip() != '']
            valid_feedback = valid_feedback[valid_feedback.str.lower() != 'no']
            
            if not valid_feedback.empty:
                temp_df = pd.DataFrame({
                    'feedback_text': valid_feedback,
                    'feedback_type': col
                })
                feedback_data = pd.concat([feedback_data, temp_df])
    
    # If no valid feedback, create a sample to avoid errors
    if feedback_data.empty:
        feedback_data = pd.DataFrame({
            'feedback_text': ['No feedback available'],
            'feedback_type': ['None']
        })
    
    # Initialize sentiment analyzer if not already done
    if not hasattr(self, 'sentiment_analyzer'):
        import nltk
        from nltk.sentiment.vader import SentimentIntensityAnalyzer
        nltk.download('vader_lexicon', quiet=True)
        self.sentiment_analyzer = SentimentIntensityAnalyzer()
    
    # Basic sentiment analysis
    feedback_data['sentiment'] = feedback_data['feedback_text'].apply(
        lambda x: self.sentiment_analyzer.polarity_scores(x)['compound'] if isinstance(x, str) else 0
    )
    
    # Categorize sentiment
    feedback_data['sentiment_category'] = pd.cut(
        feedback_data['sentiment'],
        bins=[-1, -0.25, 0.25, 1],
        labels=['Negative', 'Neutral', 'Positive']
    )
    
    # Sentiment distribution
    sentiment_counts = feedback_data['sentiment_category'].value_counts()
    
    plt.figure(figsize=(10, 6))
    sns.countplot(x='sentiment_category', data=feedback_data, palette='RdYlGn')
    plt.title('Sentiment Distribution in User Feedback')
    plt.savefig('reports/sentiment_distribution.png')
    
    # Extract common themes
    if nltk.download('punkt', quiet=True) and nltk.download('stopwords', quiet=True):
        from nltk.tokenize import word_tokenize
        from nltk.corpus import stopwords
        from collections import Counter
        
        stop_words = set(stopwords.words('english'))
        
        def extract_keywords(text_series):
            all_words = []
            for text in text_series:
                if isinstance(text, str):
                    words = word_tokenize(text.lower())
                    words = [w for w in words if w.isalpha() and w not in stop_words]
                    all_words.extend(words)
            return Counter(all_words).most_common(10)
        
        positive_feedback = feedback_data[feedback_data['sentiment_category'] == 'Positive']
        negative_feedback = feedback_data[feedback_data['sentiment_category'] == 'Negative']
        
        positive_themes = extract_keywords(positive_feedback['feedback_text'])
        negative_themes = extract_keywords(negative_feedback['feedback_text'])
        
        # Visualize themes
        if positive_themes and negative_themes:
            plt.figure(figsize=(12, 10))
            
            if positive_themes:
                plt.subplot(2, 1, 1)
                sns.barplot(x=[item[1] for item in positive_themes], 
                            y=[item[0] for item in positive_themes],
                            palette='Greens_r')
                plt.title('Common Themes in Positive Feedback')
                plt.xlabel('Frequency')
            
            if negative_themes:
                plt.subplot(2, 1, 2)
                sns.barplot(x=[item[1] for item in negative_themes], 
                            y=[item[0] for item in negative_themes],
                            palette='Reds_r')
                plt.title('Common Themes in Negative Feedback')
                plt.xlabel('Frequency')
            
            plt.tight_layout()
            plt.savefig('reports/feedback_themes.png')
    
    # Analyze concerns specifically
    concern_column = 'Do you have any concerns regarding using technology for taste enhancement?'
    if concern_column in survey_data.columns:
        valid_concerns = survey_data[concern_column].dropna()
        valid_concerns = valid_concerns[valid_concerns.str.strip() != '']
        valid_concerns = valid_concerns[valid_concerns.str.lower() != 'no']
        
        if not valid_concerns.empty:
            # Word cloud for concerns
            try:
                from wordcloud import WordCloud
                
                concerns_text = ' '.join(valid_concerns)
                
                wordcloud = WordCloud(width=800, height=400, 
                                     background_color='white', 
                                     max_words=100).generate(concerns_text)
                
                plt.figure(figsize=(10, 5))
                plt.imshow(wordcloud, interpolation='bilinear')
                plt.axis('off')
                plt.title('Concerns About Taste Enhancement Technology')
                plt.savefig('reports/concerns_wordcloud.png')
            except ImportError:
                print("WordCloud package not available. Skipping word cloud visualization.")
    
    # Generate report
    report = {
        'sentiment_distribution': sentiment_counts.to_dict(),
        'average_sentiment': feedback_data['sentiment'].mean(),
        'positive_themes': dict(positive_themes) if 'positive_themes' in locals() else {},
        'negative_themes': dict(negative_themes) if 'negative_themes' in locals() else {},
        'concerns_count': len(valid_concerns) if 'valid_concerns' in locals() else 0
    }
    
    # Create reports directory if it doesn't exist
    os.makedirs('reports', exist_ok=True)
    
    with open('reports/sentiment_analysis_report.json', 'w') as f:
        json.dump(report, f, indent=4)
    
    return report

def train_user_behavior_model(self, user_data_path):
    """Train model to predict user behavior"""
    # Load survey data and transform it for modeling
    survey_data = pd.read_csv('survey_data.csv')
    
    # Create derived features from survey data
    # Convert yes/no responses to binary
    binary_map = {'Yes': 1, 'No': 0}
    if 'Do you follow a low-sodium diet?' in survey_data.columns:
        survey_data['sodium_restriction_level'] = survey_data['Do you follow a low-sodium diet?'].map(binary_map).fillna(0)
    
    # Map awareness to technology comfort
    if 'Are you aware of technologies that enhance taste perception without adding sodium?' in survey_data.columns:
        survey_data['technology_comfort'] = survey_data['Are you aware of technologies that enhance taste perception without adding sodium?'].map(binary_map).fillna(0)
    
    # Create a binary target: device interest
    device_interest_map = {'Yes': 1, 'No': 0, 'Maybe': 0.5}  # Consider 'Maybe' as partial interest
    if 'Would you be interested in trying a device that enhances salty and umami flavors using electric stimulation?' in survey_data.columns:
        survey_data['device_interest'] = survey_data['Would you be interested in trying a device that enhances salty and umami flavors using electric stimulation?'].map(device_interest_map).fillna(0)
    
    # Create continued_usage proxy from purchase interest
    purchase_map = {'Yes': 1, 'Maybe': 0.5, 'No': 0}
    if 'Would you consider purchasing such a device if it improves taste satisfaction without adding sodium?' in survey_data.columns:
        survey_data['continued_usage'] = survey_data['Would you consider purchasing such a device if it improves taste satisfaction without adding sodium?'].map(purchase_map)
        # Convert to binary for classification
        survey_data['continued_usage'] = (survey_data['continued_usage'] >= 0.5).astype(int)
    
    # Create a taste sensitivity feature from salt preference
    salt_preference_map = {'Too Salty': 0, 'Perfect': 0.5, 'Too Bland': 1}
    if 'Do you find the current salt content in these dishes' in survey_data.columns:
        survey_data['taste_sensitivity'] = survey_data['Do you find the current salt content in these dishes'].map(salt_preference_map).fillna(0.5)
    
    # Create previous adjustments from salt addition habits
    adjustment_map = {'Always': 1, 'Sometimes': 0.5, 'Never': 0}
    if 'Have you ever added salt or condiments to enhance taste in low-sodium dishes?' in survey_data.columns:
        survey_data['previous_adjustments'] = survey_data['Have you ever added salt or condiments to enhance taste in low-sodium dishes?'].map(adjustment_map).fillna(0)
    
    # Create a proxy for average usage time based on restaurant frequency
    freq_map = {'Daily': 30, 'Weekly': 20, 'Monthly': 10, 'Rarely': 5}
    if 'How often do you eat at restaurants?' in survey_data.columns:
        survey_data['average_usage_time'] = survey_data['How often do you eat at restaurants?'].map(freq_map).fillna(5)
    
    # Create gender binary feature
    gender_map = {'Male': 1, 'Female': 0}
    if 'Gender' in survey_data.columns:
        survey_data['gender'] = survey_data['Gender'].str.strip().map(gender_map).fillna(0)
    
    # Feature engineering
    features = [
        'Age', 'gender', 'sodium_restriction_level', 
        'technology_comfort', 'average_usage_time',
        'taste_sensitivity', 'previous_adjustments'
    ]
    
    # Ensure all features exist
    available_features = []
    for feature in features:
        if feature in survey_data.columns:
            available_features.append(feature)
        else:
            print(f"Warning: Feature '{feature}' not found in data")
    
    if not available_features or 'continued_usage' not in survey_data.columns:
        print("Not enough features available for modeling")
        return None
    
    X = survey_data[available_features]
    y = survey_data['continued_usage']  # Target: whether they would purchase the device
    
    # Handle missing values
    X = X.fillna(X.mean())
    
    # Split data
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train model
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Save the model
        model.save('models/sodium_prediction_model.pkl')
        self.food_recognition_model = model
    
    # Evaluate
    from sklearn.metrics import classification_report, confusion_matrix
    y_pred = model.predict(X_test)
    
    print(classification_report(y_test, y_pred))
    
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig('reports/confusion_matrix.png')
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': available_features,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(x='importance', y='feature', data=feature_importance)
    plt.title('Features Affecting Purchase Decision')
    plt.savefig('reports/feature_importance.png')
    
    # Create models directory if it doesn't exist
    os.makedirs('models', exist_ok=True)
    
    # Save model
    import joblib
    joblib.dump(model, 'models/user_behavior_model.pkl')
    self.user_behavior_model = model
    
    return model, feature_importance

def predict_user_retention(self, user_data):
    """Predict whether a user will continue using the device"""
    if self.user_behavior_model is None:
        print("User behavior model not loaded")
        return None
    
    # Ensure data is in the right format
    if isinstance(user_data, dict):
        user_data = pd.DataFrame([user_data])
        # User retention prediction
    try:
        prediction = self.user_behavior_model.predict(user_data)
        probability = self.user_behavior_model.predict_proba(user_data)
        
        return {
            'will_continue': bool(prediction[0]),
            'probability': float(probability[0][1] if prediction[0] else probability[0][0])
        }
    except Exception as e:
        print(f"Error predicting user retention: {e}")
        return None

# System Integration
def process_food_image(self, image_path, user_id):
    """Process a food image and make recommendations"""
    # Step 1: Food recognition
    food_info = self.identify_food(image_path)
    if food_info is None:
        return {"error": "Could not identify food"}
    
    # Step 2: Get user preferences
    user_preferences = self.get_user_preferences(user_id)
    
    # Step 3: Predict sodium content
    sodium_content = self.predict_sodium_content(food_info['food_type'])
    
    # Step 4: Recommend stimulation level
    stimulation_level = self.recommend_stimulation_level(
        food_info['food_type'], 
        user_preferences.get('saltiness_preference', 0.5)
    )
    
    # Step 5: Log the interaction
    self.log_interaction(user_id, food_info['food_type'], stimulation_level)
    
    return {
        "food_type": food_info['food_type'],
        "confidence": food_info['confidence'],
        "sodium_content": sodium_content,
        "recommended_stimulation_level": stimulation_level
    }

def get_user_preferences(self, user_id):
    """Get user preferences from database"""
    # In a real implementation, this would query a database
    # For now, return default values
    return {
        "saltiness_preference": 0.5,  # Scale from 0-1
        "previous_adjustments": []
    }

def log_interaction(self, user_id, food_type, stimulation_level):
    """Log user interaction for future analysis"""
    # In a real implementation, this would write to a database
    log_entry = {
        "user_id": user_id,
        "timestamp": pd.Timestamp.now(),
        "food_type": food_type,
        "stimulation_level": stimulation_level
    }
    
    # For now, just print the log
    print(f"Logged interaction: {log_entry}")

def generate_comprehensive_report(self):
    """Generate a comprehensive report of all data science components"""
    # Create a report directory if it doesn't exist
    os.makedirs('reports', exist_ok=True)
    
    report = {
        "food_recognition": {
            "model_available": self.food_recognition_model is not None,
            "accuracy": 0.85  # Placeholder
        },
        "sodium_prediction": {
            "model_available": self.sodium_prediction_model is not None,
            "accuracy": 0.78  # Placeholder
        },
        "user_behavior": {
            "model_available": self.user_behavior_model is not None,
            "accuracy": 0.82  # Placeholder
        },
        "database": {
            "food_database_available": self.food_database is not None,
            "food_items": len(self.food_database) if self.food_database is not None else 0
        }
    }
    
    # Save the report
    with open('reports/comprehensive_report.json', 'w') as f:
        json.dump(report, f, indent=4)
    
    return report

FoodRecognitionModel.analyze_user_feedback = analyze_user_feedback
FoodRecognitionModel.generate_comprehensive_report = generate_comprehensive_report
FoodRecognitionModel.log_interaction = log_interaction
FoodRecognitionModel.get_user_preferences = get_user_preferences
FoodRecognitionModel.process_food_image = process_food_image
FoodRecognitionModel.predict_user_retention = predict_user_retention
FoodRecognitionModel.train_user_behavior_model = train_user_behavior_model

In [6]:
def load_models(self):
        """Load pre-trained models if available"""
        try:
            if os.path.exists('models/food_recognition_model.h5'):
                self.food_recognition_model = load_model('models/food_recognition_model.h5')
                print("Food recognition model loaded")
            
            if os.path.exists('models/sodium_prediction_model.pkl'):
                self.sodium_prediction_model = joblib.load('models/sodium_prediction_model.pkl')
                print("Sodium prediction model loaded")
                
            if os.path.exists('models/user_behavior_model.pkl'):
                self.user_behavior_model = joblib.load('models/user_behavior_model.pkl')
                print("User behavior model loaded")
                
            food_database_path = r'C:\Users\patel\Downloads\UECFOOD256'

            if os.path.exists(food_database_path):
                self.food_database = pd.read_csv(food_database_path)
                print("Food database loaded")
            else:
                print("Food database not found at the specified path.")
                
        except Exception as e:
            print(f"Error loading models: {e}")

FoodRecognitionModel.load_models=load_models

In [8]:
if __name__ == "__main__":
    spoon_ds = FoodRecognitionModel()
    
    # Load pre-trained models
    spoon_ds.load_models()
    
    # Example: Process a food image
    result = spoon_ds.process_food_image('15847.jpg', user_id=123)
    print(result)
    
#     # Example: Analyze survey data
#     survey_report = spoon_ds.analyze_survey_data('data/survey_results.csv')
#     print("Survey analysis complete")
    
#     # Example: Analyze user feedback
#     feedback_report = spoon_ds.analyze_user_feedback('data/user_feedback.csv')
#     print("Feedback analysis complete")
    
#     # Generate comprehensive report
#     final_report = spoon_ds.generate_comprehensive_report()
#     print("Comprehensive report generated")

Food recognition model not loaded
{'error': 'Could not identify food'}
