# **1. INSTALL AND IMPORT ALL LIBRARIES**

In [1]:
# !pip install -q tensorflow-recommenders
# !pip install -q --upgrade tensorflow-datasets

In [2]:
import os
import tempfile

import numpy as np
import pandas as pd
import tensorflow as tf
from typing import Dict, Text

from itertools import combinations

import gdown
import zipfile

## **a. Download the dataset from google drive link**

In [3]:
# URL download
url_dataset = 'https://drive.usercontent.google.com/download?id=1rGmcw6za9Xzrll5h22wtra1SjrEj1q4m&export=download&authuser=3&confirm=t&uuid=d42466e3-f504-40fe-b22b-01f68a8c1288&at=APZUnTU4id3vBiE1wbiZnHvcEE7H:1700142409715'

# Replace with the desired names for the downloaded zip files
name_dataset = 'Dataset.zip'

# Download the zip files
gdown.download(url_dataset, name_dataset, quiet=False)

# Unzip the downloaded files
with zipfile.ZipFile(name_dataset, 'r') as zip_ref:
    zip_ref.extractall('Dataset')  # Extracts into 'Dataset' folder

print("Files have been downloaded and extracted.")

Downloading...
From: https://drive.usercontent.google.com/download?id=1rGmcw6za9Xzrll5h22wtra1SjrEj1q4m&export=download&authuser=3&confirm=t&uuid=d42466e3-f504-40fe-b22b-01f68a8c1288&at=APZUnTU4id3vBiE1wbiZnHvcEE7H:1700142409715
To: /content/Dataset.zip
100%|██████████| 1.66M/1.66M [00:00<00:00, 146MB/s]

Files have been downloaded and extracted.





# **2. PREPROCESSING DATA**

## **a. Food Data**

In [4]:
food_data = pd.read_csv("Dataset/FoodData/food_raw.csv")

In [5]:
food_data.head()

Unnamed: 0,No.,Food_ID,Nama Bahan Makanan,Fast_Food,Sumber,Tipe,Jenis_Olahan,Air (g),Energi (Kal),Protein (g),...,Daging Babi,Daging Kambing,Daging Sapi,Ikan,Kedelai,Sayur,Susu,Telur Ayam,Tepung,Umbi-umbian
0,1,FNT001,Bakso,Ya,Daging Sapi,Makanan Berat,Rebus,70.0,202,12.41,...,0,0,1,0,0,0,0,0,0,0
1,2,FNT002,Bubur Ayam,Ya,"Beras, Daging Ayam",Makanan Berat,Rebus,80.0,155,11.48,...,0,0,0,0,0,0,0,0,0,0
2,3,FNT003,Mi Goreng,Ya,Tepung,Makanan Berat,Rebus,0.0,475,10.0,...,0,0,0,0,0,0,0,0,1,0
3,4,FNT004,Sate,Tidak,"Daging Ayam, Daging Kambing",Makanan Berat,Bakar,0.0,225,19.54,...,0,0,0,0,0,0,0,0,0,0
4,5,FNT005,Soto,Tidak,"Daging Ayam, Daging Sapi",Makanan Berat,Rebus,70.0,130,9.96,...,0,0,0,0,0,0,0,0,0,0


In [6]:
food_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151 entries, 0 to 150
Data columns (total 85 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   No.                        151 non-null    int64  
 1   Food_ID                    151 non-null    object 
 2   Nama Bahan Makanan         151 non-null    object 
 3   Fast_Food                  151 non-null    object 
 4   Sumber                     151 non-null    object 
 5   Tipe                       151 non-null    object 
 6   Jenis_Olahan               151 non-null    object 
 7   Air (g)                    151 non-null    float64
 8   Energi (Kal)               151 non-null    int64  
 9   Protein (g)                151 non-null    float64
 10  Lemak (g)                  151 non-null    float64
 11  Karbohidrat (g)            151 non-null    float64
 12  Serat (g)                  151 non-null    float64
 13  Abu (g)                    151 non-null    float64

In [7]:
food_data.columns

Index(['No.', 'Food_ID', 'Nama Bahan Makanan', 'Fast_Food', 'Sumber', 'Tipe',
       'Jenis_Olahan', 'Air (g)', 'Energi (Kal)', 'Protein (g)', 'Lemak (g)',
       'Karbohidrat (g)', 'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)',
       'Fosfor (P) (mg)', 'Besi (Fe) (mg)', 'Natrium (Na) (mg)',
       'Kalium (Ka) (mg)', 'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)',
       'Retinol (vit. A) (mcg)', 'β-karoten (mcg)', 'Karoten total (mcg)',
       'Thiamin (vit. B1) (mg)', 'Riboflavin (vit. B2) (mg)', 'Niasin (mg)',
       'Vitamin C (mg)', 'BDD (%)', 'Mentah / Olahan', 'Kelompok Makanan',
       'Sumber TKPI 2019', 'Fast_Food_New', 'Tipe_New', 'Jenis_Olahan_New',
       'Mentah / Olahan_New', 'Kelompok Makanan_New', 'Fast_Food_New_0',
       'Fast_Food_New_1', 'Tipe_New_0', 'Tipe_New_1', 'Jenis_Olahan_New_0',
       'Jenis_Olahan_New_1', 'Jenis_Olahan_New_2', 'Jenis_Olahan_New_3',
       'Jenis_Olahan_New_4', 'Jenis_Olahan_New_5', 'Jenis_Olahan_New_6',
       'Jenis_Olahan_New_7', 'Jenis_Olahan_N

In [8]:
for column in ['Fast_Food','Sumber','Tipe','Jenis_Olahan','Mentah / Olahan','Kelompok Makanan']:
    print(food_data[column].unique())

['Ya' 'Tidak']
['Daging Sapi' 'Beras, Daging Ayam' 'Tepung' 'Daging Ayam, Daging Kambing'
 'Daging Ayam, Daging Sapi' 'Daging Kambing' 'Daging Ayam'
 'Daging Ayam, Daging Kerbau' 'Daging Sapi, Daging Kerbau, Kedelai'
 'Kedelai' 'Biji-bijian' 'Buah' 'Beras' 'Beras, Ikan' 'Sayur'
 'Daging Babi' 'Daging Sapi, Daging Ayam' 'Ikan' 'Ikan, Telur'
 'Telur Ayam' 'Buah, Sayur' 'Susu'
 'Tepung, Telur, Daging Ayam, Daging Sapi' 'Tepung, Telur, Daging Ayam'
 'Umbi-umbian']
['Makanan Berat' 'Makanan Ringan']
['Rebus' 'Bakar' 'Mentah' 'Goreng' 'Panggang' 'Kukus' 'Gulai' 'Asap'
 'Presto' 'Tumis' 'Beku']
['Olahan' 'Tunggal']
['Daging' 'Serealia' 'Kacang' 'Buah' 'Sayur' 'Ikan dsb' 'Telur' 'Susu'
 'Umbi']


In [9]:
for index, row in food_data.iterrows():
    if row[' Daging Ayam'] == 1 and row['Daging Ayam'] == 0:
        food_data.at[index, 'Daging Ayam'] = 1
    if row[' Daging Kambing'] == 1 and row['Daging Kambing'] == 0:
        food_data.at[index, 'Daging Kambing'] = 1
    # if row[' Daging Kerbau'] == 1 and row['Daging Kerbau'] == 0:
    #     food_data.at[index, 'Daging Kerbau'] = 1
    if row[' Daging Sapi'] == 1 and row['Daging Sapi'] == 0:
        food_data.at[index, 'Daging Sapi'] = 1
    if row[' Ikan'] == 1 and row['Ikan'] == 0:
        food_data.at[index, 'Ikan'] = 1
    if row[' Kedelai'] == 1 and row['Kedelai'] == 0:
        food_data.at[index, 'Kedelai'] = 1
    if row[' Sayur'] == 1 and row['Sayur'] == 0:
        food_data.at[index, 'Sayur'] = 1
    if row[' Telur'] == 1 and row['Telur Ayam'] == 0:
        food_data.at[index, 'Telur Ayam'] = 1

In [10]:
# Cek data
filtered_data = food_data[food_data[' Telur'] == 1]
print(filtered_data['Telur Ayam'])

74     1
147    1
148    1
149    1
Name: Telur Ayam, dtype: int64


```
Jenis olahan :
  0 = Asap
  1 = Bakar
  2 = Beku
  3 = Goreng
  4 = Gulai
  5 = Kukus
  6 = Mentah
  7 = Panggang
  8 = Presto
  9 = Rebus
  10 = Tumis
Tipe:
  0 = Makanan Berat
  1 = Makanan Ringan
```

In [11]:
food_data = food_data.drop(columns=[' Daging Ayam', ' Daging Kambing', ' Daging Sapi', ' Ikan', ' Kedelai', ' Sayur', ' Telur'])

In [12]:
food_data.columns

Index(['No.', 'Food_ID', 'Nama Bahan Makanan', 'Fast_Food', 'Sumber', 'Tipe',
       'Jenis_Olahan', 'Air (g)', 'Energi (Kal)', 'Protein (g)', 'Lemak (g)',
       'Karbohidrat (g)', 'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)',
       'Fosfor (P) (mg)', 'Besi (Fe) (mg)', 'Natrium (Na) (mg)',
       'Kalium (Ka) (mg)', 'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)',
       'Retinol (vit. A) (mcg)', 'β-karoten (mcg)', 'Karoten total (mcg)',
       'Thiamin (vit. B1) (mg)', 'Riboflavin (vit. B2) (mg)', 'Niasin (mg)',
       'Vitamin C (mg)', 'BDD (%)', 'Mentah / Olahan', 'Kelompok Makanan',
       'Sumber TKPI 2019', 'Fast_Food_New', 'Tipe_New', 'Jenis_Olahan_New',
       'Mentah / Olahan_New', 'Kelompok Makanan_New', 'Fast_Food_New_0',
       'Fast_Food_New_1', 'Tipe_New_0', 'Tipe_New_1', 'Jenis_Olahan_New_0',
       'Jenis_Olahan_New_1', 'Jenis_Olahan_New_2', 'Jenis_Olahan_New_3',
       'Jenis_Olahan_New_4', 'Jenis_Olahan_New_5', 'Jenis_Olahan_New_6',
       'Jenis_Olahan_New_7', 'Jenis_Olahan_N

In [13]:
food_data = food_data.rename(columns={' Daging Kerbau' : 'Daging Kerbau'})

In [14]:
food_data.columns

Index(['No.', 'Food_ID', 'Nama Bahan Makanan', 'Fast_Food', 'Sumber', 'Tipe',
       'Jenis_Olahan', 'Air (g)', 'Energi (Kal)', 'Protein (g)', 'Lemak (g)',
       'Karbohidrat (g)', 'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)',
       'Fosfor (P) (mg)', 'Besi (Fe) (mg)', 'Natrium (Na) (mg)',
       'Kalium (Ka) (mg)', 'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)',
       'Retinol (vit. A) (mcg)', 'β-karoten (mcg)', 'Karoten total (mcg)',
       'Thiamin (vit. B1) (mg)', 'Riboflavin (vit. B2) (mg)', 'Niasin (mg)',
       'Vitamin C (mg)', 'BDD (%)', 'Mentah / Olahan', 'Kelompok Makanan',
       'Sumber TKPI 2019', 'Fast_Food_New', 'Tipe_New', 'Jenis_Olahan_New',
       'Mentah / Olahan_New', 'Kelompok Makanan_New', 'Fast_Food_New_0',
       'Fast_Food_New_1', 'Tipe_New_0', 'Tipe_New_1', 'Jenis_Olahan_New_0',
       'Jenis_Olahan_New_1', 'Jenis_Olahan_New_2', 'Jenis_Olahan_New_3',
       'Jenis_Olahan_New_4', 'Jenis_Olahan_New_5', 'Jenis_Olahan_New_6',
       'Jenis_Olahan_New_7', 'Jenis_Olahan_N

In [15]:
food_data = food_data.rename(columns={'Food_ID' : 'food_id'})
food_data = food_data.rename(columns={'Nama Bahan Makanan' : 'nama_makanan'})
food_data = food_data.rename(columns={'Sumber' : 'sumber'})
food_data = food_data.rename(columns={'Tipe' : 'tipe'})
food_data = food_data.rename(columns={'Fast_Food' : 'fast_food'})
food_data = food_data.rename(columns={'Jenis_Olahan' : 'jenis_olahan'})
food_data = food_data.rename(columns={'Energi (Kal)' : 'kalori'})
food_data = food_data.rename(columns={'Protein (g)' : 'protein'})
food_data = food_data.rename(columns={'Lemak (g)' : 'lemak'})
food_data = food_data.rename(columns={'Karbohidrat (g)' : 'karbohidrat'})
food_data = food_data.rename(columns={'Daging Kerbau' : 'daging_kerbau'})
food_data = food_data.rename(columns={'Beras' : 'beras'})
food_data = food_data.rename(columns={'Biji-bijian' : 'biji_bijian'})
food_data = food_data.rename(columns={'Buah' : 'buah'})
food_data = food_data.rename(columns={'Daging Ayam' : 'daging_ayam'})
food_data = food_data.rename(columns={'Daging Babi' : 'daging_babi'})
food_data = food_data.rename(columns={'Daging Kambing' : 'daging_kambing'})
food_data = food_data.rename(columns={'Daging Sapi' : 'daging_sapi'})
food_data = food_data.rename(columns={'Ikan' : 'ikan'})
food_data = food_data.rename(columns={'Kedelai' : 'kedelai'})
food_data = food_data.rename(columns={'Sayur' : 'sayur'})
food_data = food_data.rename(columns={'Susu' : 'susu'})
food_data = food_data.rename(columns={'Telur Ayam' : 'telur_ayam'})
food_data = food_data.rename(columns={'Tepung' : 'tepung'})
food_data = food_data.rename(columns={'Umbi-umbian' : 'umbi_umbian'})

In [16]:
food_data.columns

Index(['No.', 'food_id', 'nama_makanan', 'fast_food', 'sumber', 'tipe',
       'jenis_olahan', 'Air (g)', 'kalori', 'protein', 'lemak', 'karbohidrat',
       'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)', 'Fosfor (P) (mg)',
       'Besi (Fe) (mg)', 'Natrium (Na) (mg)', 'Kalium (Ka) (mg)',
       'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)', 'Retinol (vit. A) (mcg)',
       'β-karoten (mcg)', 'Karoten total (mcg)', 'Thiamin (vit. B1) (mg)',
       'Riboflavin (vit. B2) (mg)', 'Niasin (mg)', 'Vitamin C (mg)', 'BDD (%)',
       'Mentah / Olahan', 'Kelompok Makanan', 'Sumber TKPI 2019',
       'Fast_Food_New', 'Tipe_New', 'Jenis_Olahan_New', 'Mentah / Olahan_New',
       'Kelompok Makanan_New', 'Fast_Food_New_0', 'Fast_Food_New_1',
       'Tipe_New_0', 'Tipe_New_1', 'Jenis_Olahan_New_0', 'Jenis_Olahan_New_1',
       'Jenis_Olahan_New_2', 'Jenis_Olahan_New_3', 'Jenis_Olahan_New_4',
       'Jenis_Olahan_New_5', 'Jenis_Olahan_New_6', 'Jenis_Olahan_New_7',
       'Jenis_Olahan_New_8', 'Jenis_Olahan_New

In [17]:
food_data['nama_makanan'] = food_data['nama_makanan'].astype(str)

In [18]:
column_dataset = ['food_id', 'nama_makanan', 'sumber', 'tipe', 'jenis_olahan', 'fast_food', 'kalori', 'protein', 'lemak', 'karbohidrat',
                  'daging_kerbau', 'beras', 'biji_bijian', 'buah', 'daging_ayam', 'daging_babi', 'daging_kambing', 'daging_sapi',
                  'ikan', 'kedelai', 'sayur', 'susu', 'telur_ayam', 'tepung', 'umbi_umbian']

In [19]:
food_data_fix = food_data[column_dataset]

In [20]:
food_data_fix.to_csv('food_data_final.csv', index=False)

In [21]:
food_data_fix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 151 entries, 0 to 150
Data columns (total 25 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   food_id         151 non-null    object 
 1   nama_makanan    151 non-null    object 
 2   sumber          151 non-null    object 
 3   tipe            151 non-null    object 
 4   jenis_olahan    151 non-null    object 
 5   fast_food       151 non-null    object 
 6   kalori          151 non-null    int64  
 7   protein         151 non-null    float64
 8   lemak           151 non-null    float64
 9   karbohidrat     151 non-null    float64
 10  daging_kerbau   151 non-null    int64  
 11  beras           151 non-null    int64  
 12  biji_bijian     151 non-null    int64  
 13  buah            151 non-null    int64  
 14  daging_ayam     151 non-null    int64  
 15  daging_babi     151 non-null    int64  
 16  daging_kambing  151 non-null    int64  
 17  daging_sapi     151 non-null    int

## **b. Food Popularity Data**

In [22]:
user_data = pd.read_csv("Dataset/FoodData/food_popularity.csv")

In [23]:
user_data

Unnamed: 0,Food_ID,User_ID,value,sex_new_0,sex_new_1,blood_group_new_0,blood_group_new_1,blood_group_new_2,blood_group_new_3,blood_group_new_4,...,Daging Babi,Daging Kambing,Daging Sapi,Ikan,Kedelai,Sayur,Susu,Telur Ayam,Tepung,Umbi-umbian
0,FNT001,UNT001,2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,1,0,0,0,0,0,0,0
1,FNT002,UNT001,2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,FNT003,UNT001,4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
3,FNT004,UNT001,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,FNT005,UNT001,2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22796,FNT147,UNT105,2,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
22797,FNT148,UNT105,5,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
22798,FNT149,UNT105,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0
22799,FNT150,UNT105,1,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0,0,0,0,0,0,0,0,1,0


In [24]:
user_data.columns

Index(['Food_ID', 'User_ID', 'value', 'sex_new_0', 'sex_new_1',
       'blood_group_new_0', 'blood_group_new_1', 'blood_group_new_2',
       'blood_group_new_3', 'blood_group_new_4', 'blood_group_new_5',
       'blood_group_new_6', 'blood_group_new_7', 'blood_group', 'username',
       'name', 'sex', 'address', 'mail', 'Age', 'Body_Weight', 'Body_Height',
       'Cal_Need', 'Nama Bahan Makanan', 'Fast_Food', 'Sumber', 'Tipe',
       'Jenis_Olahan', 'Air (g)', 'Energi (Kal)', 'Protein (g)', 'Lemak (g)',
       'Karbohidrat (g)', 'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)',
       'Fosfor (P) (mg)', 'Besi (Fe) (mg)', 'Natrium (Na) (mg)',
       'Kalium (Ka) (mg)', 'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)',
       'Retinol (vit. A) (mcg)', 'β-karoten (mcg)', 'Karoten total (mcg)',
       'Thiamin (vit. B1) (mg)', 'Riboflavin (vit. B2) (mg)', 'Niasin (mg)',
       'Vitamin C (mg)', 'BDD (%)', 'Mentah / Olahan', 'Kelompok Makanan',
       'Fast_Food_New_0', 'Fast_Food_New_1', 'Tipe_New_0', 'Tip

In [25]:
user_data = user_data.rename(columns={'User_ID' : 'user_id'})
user_data = user_data.rename(columns={'Food_ID' : 'food_id'})
user_data = user_data.rename(columns={'Nama Bahan Makanan' : 'nama_makanan'})
user_data = user_data.rename(columns={'sex_new_1' : 'gender'})
user_data = user_data.rename(columns={'Age' : 'age'})
user_data = user_data.rename(columns={'Body_Weight' : 'body_weight'})
user_data = user_data.rename(columns={'Body_Height' : 'body_height'})
user_data = user_data.rename(columns={'Cal_Need' : 'cal_need'})

In [26]:
user_data.columns

Index(['food_id', 'user_id', 'value', 'sex_new_0', 'gender',
       'blood_group_new_0', 'blood_group_new_1', 'blood_group_new_2',
       'blood_group_new_3', 'blood_group_new_4', 'blood_group_new_5',
       'blood_group_new_6', 'blood_group_new_7', 'blood_group', 'username',
       'name', 'sex', 'address', 'mail', 'age', 'body_weight', 'body_height',
       'cal_need', 'nama_makanan', 'Fast_Food', 'Sumber', 'Tipe',
       'Jenis_Olahan', 'Air (g)', 'Energi (Kal)', 'Protein (g)', 'Lemak (g)',
       'Karbohidrat (g)', 'Serat (g)', 'Abu (g)', 'Kalsium (Ca) (mg)',
       'Fosfor (P) (mg)', 'Besi (Fe) (mg)', 'Natrium (Na) (mg)',
       'Kalium (Ka) (mg)', 'Tembaga (Cu) (mg)', 'Seng (Zn) (mg)',
       'Retinol (vit. A) (mcg)', 'β-karoten (mcg)', 'Karoten total (mcg)',
       'Thiamin (vit. B1) (mg)', 'Riboflavin (vit. B2) (mg)', 'Niasin (mg)',
       'Vitamin C (mg)', 'BDD (%)', 'Mentah / Olahan', 'Kelompok Makanan',
       'Fast_Food_New_0', 'Fast_Food_New_1', 'Tipe_New_0', 'Tipe_New_1',

In [27]:
user_data['user_id'] = user_data['user_id'].astype(str)
user_data['food_id'] = user_data['food_id'].astype(str)
user_data['nama_makanan'] = user_data['nama_makanan'].astype(str)
user_data['gender'] = user_data['gender'].astype(np.int32)
user_data['age'] = user_data['age'].astype(np.int32)
user_data['body_weight'] = user_data['body_weight'].astype(np.int32)
user_data['body_height'] = user_data['body_height'].astype(np.int32)
user_data['cal_need'] = user_data['cal_need'].astype(np.int32)

In [28]:
column_user_data = ['user_id', 'gender', 'age', 'body_weight', 'body_height', 'cal_need', 'food_id', 'value']
user_data_fix = user_data[column_user_data]

In [29]:
user_data_fix

Unnamed: 0,user_id,gender,age,body_weight,body_height,cal_need,food_id,value
0,UNT001,1,22,54,160,1869,FNT001,2
1,UNT001,1,22,54,160,1869,FNT002,2
2,UNT001,1,22,54,160,1869,FNT003,4
3,UNT001,1,22,54,160,1869,FNT004,1
4,UNT001,1,22,54,160,1869,FNT005,2
...,...,...,...,...,...,...,...,...
22796,UNT105,0,29,61,157,1962,FNT147,2
22797,UNT105,0,29,61,157,1962,FNT148,5
22798,UNT105,0,29,61,157,1962,FNT149,3
22799,UNT105,0,29,61,157,1962,FNT150,1


In [30]:
user_id_old = user_data_fix['user_id'].values
user_id_old

array(['UNT001', 'UNT001', 'UNT001', ..., 'UNT105', 'UNT105', 'UNT105'],
      dtype=object)

In [31]:
food_id_old = user_data_fix['food_id'].values
print(food_id_old)

['FNT001' 'FNT002' 'FNT003' ... 'FNT149' 'FNT150' 'FNT151']


In [32]:
def convert_user_id_to_numeric(old_data):
    return int(old_data.replace('UNT', ''))

def convert_food_id_to_numeric(old_data):
    return int(old_data.replace('FNT', ''))

In [33]:
user_data_fix['user_id'] = [convert_user_id_to_numeric(uid) for uid in user_id_old]
user_data_fix['food_id'] = [convert_food_id_to_numeric(uid) for uid in food_id_old]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_data_fix['user_id'] = [convert_user_id_to_numeric(uid) for uid in user_id_old]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_data_fix['food_id'] = [convert_food_id_to_numeric(uid) for uid in food_id_old]


In [34]:
np.unique(user_data_fix['user_id'])
np.unique(food_data_fix['food_id'])

array(['FNT001', 'FNT002', 'FNT003', 'FNT004', 'FNT005', 'FNT006',
       'FNT007', 'FNT008', 'FNT009', 'FNT010', 'FNT011', 'FNT012',
       'FNT013', 'FNT014', 'FNT015', 'FNT016', 'FNT017', 'FNT018',
       'FNT019', 'FNT020', 'FNT021', 'FNT022', 'FNT023', 'FNT024',
       'FNT025', 'FNT026', 'FNT027', 'FNT028', 'FNT029', 'FNT030',
       'FNT031', 'FNT032', 'FNT033', 'FNT034', 'FNT035', 'FNT036',
       'FNT037', 'FNT038', 'FNT039', 'FNT040', 'FNT041', 'FNT042',
       'FNT043', 'FNT044', 'FNT045', 'FNT046', 'FNT047', 'FNT048',
       'FNT049', 'FNT050', 'FNT051', 'FNT052', 'FNT053', 'FNT054',
       'FNT055', 'FNT056', 'FNT057', 'FNT058', 'FNT059', 'FNT060',
       'FNT061', 'FNT062', 'FNT063', 'FNT064', 'FNT065', 'FNT066',
       'FNT067', 'FNT068', 'FNT069', 'FNT070', 'FNT071', 'FNT072',
       'FNT073', 'FNT074', 'FNT075', 'FNT076', 'FNT077', 'FNT078',
       'FNT079', 'FNT080', 'FNT081', 'FNT082', 'FNT083', 'FNT084',
       'FNT085', 'FNT086', 'FNT087', 'FNT088', 'FNT089', 'FNT0

In [35]:
user_data_fix.to_csv('user_data_final.csv', index=False)

In [36]:
user_data_fix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22801 entries, 0 to 22800
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   user_id      22801 non-null  int64
 1   gender       22801 non-null  int32
 2   age          22801 non-null  int32
 3   body_weight  22801 non-null  int32
 4   body_height  22801 non-null  int32
 5   cal_need     22801 non-null  int32
 6   food_id      22801 non-null  int64
 7   value        22801 non-null  int64
dtypes: int32(5), int64(3)
memory usage: 979.9 KB
