<a href="https://colab.research.google.com/github/Lalchand-b/another_repo_example/blob/main/nutrition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from glob import glob

# ✅ Step 1: Define folder where your CSVs are
folder_path = "/content/drive/MyDrive/FINAL FOOD DATASET"  # 🔁 Change if needed
csv_files = glob(folder_path + "*.csv")

# ✅ Step 2: Initialize the list for valid dataframes
all_dfs = []

# ✅ Step 3: Process each CSV file
for file in csv_files:
    try:
        # Read the file
        df = pd.read_csv(file)

        # Inspect the columns to see if they match what you expect
        print(f"Checking columns for file: {file}")
        print(f"Columns in {file}: {df.columns.tolist()}")

        # Remove any leading/trailing spaces from column names and convert to lowercase
        df.columns = df.columns.str.strip().str.replace(' ', '_').str.lower()

        # Check cleaned column names
        print(f"Cleaned columns in {file}: {df.columns.tolist()}")

        # Columns we want to keep
        required_cols = ['food_item', 'caloric_value', 'fat', 'carbohydrates', 'protein']

        # Check if the required columns are present in the current dataframe
        if all(col in df.columns for col in required_cols):
            # Rename columns to the required standard
            df.rename(columns={
                'food_item': 'Food_Item',
                'caloric_value': 'Calories',
                'fat': 'Fats',
                'carbohydrates': 'Carbohydrates',
                'protein': 'Proteins'
            }, inplace=True)

            # Append the valid dataframe to the list
            all_dfs.append(df[['Food_Item', 'Calories', 'Proteins', 'Fats', 'Carbohydrates']])
            print(f"✅ Data from {file} added.")
        else:
            missing_cols = [col for col in required_cols if col not in df.columns]
            print(f"⚠️ Missing columns in {file}: {', '.join(missing_cols)}")

    except Exception as e:
        print(f"❌ Error reading {file}: {e}")

# ✅ Step 4: Combine the dataframes if we have valid ones
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    combined_df.drop_duplicates(subset="Food_Item", inplace=True)
    combined_df.dropna(subset=["Food_Item"], inplace=True)
    combined_df["Food_Item"] = combined_df["Food_Item"].str.lower()

    print(f"✅ Combined dataset shape: {combined_df.shape}")
else:
    print("⚠️ No valid dataframes to combine.")


⚠️ No valid dataframes to combine.


In [None]:
import pandas as pd

# ✅ Step 1: Read each file using provided paths
df1 = pd.read_csv("/content/drive/MyDrive/FINAL FOOD DATASET/FOOD-DATA-GROUP1.csv")
df2 = pd.read_csv("/content/drive/MyDrive/FINAL FOOD DATASET/FOOD-DATA-GROUP2.csv")
df3 = pd.read_csv("/content/drive/MyDrive/FINAL FOOD DATASET/FOOD-DATA-GROUP3 (FRUITS).csv")
df4 = pd.read_csv("/content/drive/MyDrive/FINAL FOOD DATASET/FOOD-DATA-GROUP4.csv")
df5 = pd.read_csv("/content/drive/MyDrive/FINAL FOOD DATASET/FOOD-DATA-GROUP5.csv")

# ✅ Step 2: Clean column names for consistency
dataframes = [df1, df2, df3, df4, df5]
cleaned_dfs = []

for df in dataframes:
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')  # Clean columns
    df.rename(columns={
        'food': 'Food_Item',
        'caloric_value': 'Calories',
        'fat': 'Fats',
        'carbohydrates': 'Carbohydrates',
        'protein': 'Proteins'
    }, inplace=True)
    required_cols = ['Food_Item', 'Calories', 'Proteins', 'Fats', 'Carbohydrates']
    cleaned_dfs.append(df[required_cols])

# ✅ Step 3: Combine all cleaned DataFrames
combined_df = pd.concat(cleaned_dfs, ignore_index=True)
combined_df.drop_duplicates(subset="Food_Item", inplace=True)
combined_df.dropna(subset=["Food_Item"], inplace=True)
combined_df["Food_Item"] = combined_df["Food_Item"].str.lower()

# ✅ Step 4: Preview the result
print("✅ Combined dataset shape:", combined_df.shape)
combined_df.head()


✅ Combined dataset shape: (2395, 5)


Unnamed: 0,Food_Item,Calories,Proteins,Fats,Carbohydrates
0,cream cheese,51,0.9,5.0,0.8
1,neufchatel cheese,215,7.8,19.4,3.1
2,requeijao cremoso light catupiry,49,0.8,3.6,0.9
3,ricotta cheese,30,1.5,2.0,1.5
4,cream cheese low fat,30,1.2,2.3,1.2


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ✅ Step 1: Create TF-IDF vectorizer based on food names
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(combined_df["Food_Item"])

# ✅ Step 2: Function to find best match for user input
def get_nutrition_info(user_input):
    user_input = user_input.lower()
    user_vec = vectorizer.transform([user_input])

    # Calculate cosine similarity
    similarity = cosine_similarity(user_vec, tfidf_matrix)

    # Get index of best matching food item
    idx = similarity.argmax()
    matched_food = combined_df.iloc[idx]

    # Show result
    print(f"\n🍽️ Closest Match: {matched_food['Food_Item'].title()}")
    print(f"🔥 Calories: {matched_food['Calories']}")
    print(f"💪 Proteins: {matched_food['Proteins']} g")
    print(f"🧈 Fats: {matched_food['Fats']} g")
    print(f"🍞 Carbohydrates: {matched_food['Carbohydrates']} g")

# ✅ Example usage:
get_nutrition_info("sandwich")



🍽️ Closest Match: Chicken Sandwich
🔥 Calories: 468
💪 Proteins: 30.4 g
🧈 Fats: 20.9 g
🍞 Carbohydrates: 39.1 g
