In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import json
import threading
from concurrent.futures import ThreadPoolExecutor

In [3]:
data = pd.read_csv('/content/drive/MyDrive/Final Project/mfp-diaries.tsv', sep='\t', header=None)
data.head()

Unnamed: 0,0,1,2,3
0,1,2014-09-14,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2924}..."
1,1,2014-09-15,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2430}..."
2,1,2014-09-16,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 1862}..."
3,1,2014-09-17,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2251}..."
4,1,2014-09-18,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2001}..."


In [41]:
def convert_to_json(row):
    id = row[0]
    date = row[1]
    details = json.loads(row[2])
    summary = json.loads(row[3])
    return pd.DataFrame([[id, date, details, summary]], columns=['Id', 'Date', 'Details', 'Summary'])

In [42]:
convert_to_json(data.iloc[0])

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."


In [43]:
def create_threads_for_rows(data):
    threads = []
    results = []

    def thread_function(index, row):
        processed_row = convert_to_json(row)
        results.append(processed_row)

    num_rows = data.shape[0]
    for i in range(num_rows):
        row = data.iloc[i, :]
        thread = threading.Thread(target=thread_function, args=(i, row))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    processed_data = pd.concat(results, ignore_index=True)
    return processed_data

In [62]:
partial_data = data.iloc[:2, :]
processed_partial_data = pd.DataFrame(create_threads_for_rows(partial_data))
processed_partial_data

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."
1,1,2014-09-15,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2430}..."


In [70]:
def parse_data(row):
  name_lst = []
  nutrishes_name_lst = []
  nutrishes_value_lst = []
  total = []
  goal = []

  id = row.iloc[:,0].values[0]
  date = row.iloc[:,1].values[0]
  meal = row.iloc[:,2][0][0].get('meal')
  detail_len = len(row.iloc[:,2][0][0].get('dishes'))

  for i in range(detail_len):
    for j in range(len(row.iloc[:,2][0][0].get('dishes')[0].get('nutritions'))):
      nutrishes_name = row.iloc[:,2][0][0].get('dishes')[i].get('nutritions')[j].get('name')
      nutrishes_value = row.iloc[:,2][0][0].get('dishes')[i].get('nutritions')[j].get('value')
      nutrishes_name_lst.append(nutrishes_name)
      nutrishes_value_lst.append(nutrishes_value)
      name = row.iloc[:,2][0][0].get('dishes')[i].get('name')
      name_lst.append(name)


  total_caloriess = row.iloc[:,3].values[0].get('total')[0].get('value')
  total_carbs = row.iloc[:,3].values[0].get('total')[1].get('value')
  total_fats = row.iloc[:,3].values[0].get('total')[2].get('value')
  total_proteins = row.iloc[:,3].values[0].get('total')[3].get('value')
  total_sodiums = row.iloc[:,3].values[0].get('total')[4].get('value')
  total_sugars = row.iloc[:,3].values[0].get('total')[5].get('value')

  goal_caloriess = row.iloc[:,3].values[0].get('goal')[0].get('value')
  goal_carbs = row.iloc[:,3].values[0].get('goal')[1].get('value')
  goal_fats = row.iloc[:,3].values[0].get('goal')[2].get('value')
  goal_proteins = row.iloc[:,3].values[0].get('goal')[3].get('value')
  goal_sodiums = row.iloc[:,3].values[0].get('goal')[4].get('value')
  goal_sugars = row.iloc[:,3].values[0].get('goal')[5].get('value')

  sequence = row.iloc[:,2].values[0][0].get('sequence')
  len_name = len(nutrishes_name_lst)
  len_value = len(nutrishes_value_lst)
  id_lst = [id] * len(nutrishes_name_lst)

  date_lst = [date] * len(nutrishes_name_lst)
  meal_lst = [meal] * len(nutrishes_name_lst)
  sequence_lst = [sequence] * len(nutrishes_name_lst)
  total_caloriess_lst = [total_caloriess] * len(nutrishes_name_lst)
  total_carbs_lst = [total_carbs] * len(nutrishes_name_lst)
  total_fats_lst = [total_fats] * len(nutrishes_name_lst)
  total_proteins_lst = [total_proteins] * len(nutrishes_name_lst)
  total_sodiums_lst = [total_sodiums] * len(nutrishes_name_lst)
  total_sugars_lst = [total_sugars] * len(nutrishes_name_lst)

  goal_caloriess_lst = [goal_caloriess] * len(nutrishes_name_lst)
  goal_carbs_lst = [goal_carbs] * len(nutrishes_name_lst)
  goal_fats_lst = [goal_fats] * len(nutrishes_name_lst)
  goal_proteins_lst = [goal_proteins] * len(nutrishes_name_lst)
  goal_sodiums_lst = [goal_sodiums] * len(nutrishes_name_lst)
  goal_sugars_lst = [goal_sugars] * len(nutrishes_name_lst)


  return pd.DataFrame({
       'Id': id_lst,
       'Date': date_lst,
       'Meal': meal_lst,
       'Nutrition_Name': nutrishes_name_lst,
       'Value': nutrishes_value_lst,
       'Name': name_lst,
       'Sequence': sequence_lst,
       'Total_Calories': total_caloriess_lst,
       'Total_Carbs': total_carbs_lst,
       'Total_Fats': total_fats_lst,
       'Total_Protein': total_proteins_lst,
       'Total_Sodium': total_sodiums_lst,
       'Total_Sugar': total_sugars_lst,
       'Goal_Calories': goal_caloriess_lst,
       'Goal_Carbs': goal_carbs_lst,
       'Goal_Fats': goal_fats_lst,
       'Goal_Protein': goal_proteins_lst,
       'Goal_Sodium': goal_sodiums_lst,
       'Goal_Sugar': goal_sugars_lst
       })

In [72]:
parse_data(pd.DataFrame(processed_partial_data.iloc[0,:]).T)

Unnamed: 0,Id,Date,Meal,Nutrition_Name,Value,Name,Sequence,Total_Calories,Total_Carbs,Total_Fats,Total_Protein,Total_Sodium,Total_Sugar,Goal_Calories,Goal_Carbs,Goal_Fats,Goal_Protein,Goal_Sodium,Goal_Sugar
0,1,2014-09-14,MY food,Calories,412,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
1,1,2014-09-14,MY food,Carbs,29,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
2,1,2014-09-14,MY food,Fat,24,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
3,1,2014-09-14,MY food,Protein,21,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
4,1,2014-09-14,MY food,Sodium,258,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,1,2014-09-14,MY food,Carbs,15,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
62,1,2014-09-14,MY food,Fat,10,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
63,1,2014-09-14,MY food,Protein,10,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
64,1,2014-09-14,MY food,Sodium,176,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119


In [65]:
pd.DataFrame(processed_partial_data.iloc[0,:]).T

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."


In [66]:
def create_threads_parse_rows(data):
    threads = []
    results = []

    def thread_function(index, row):
        processed_row = parse_data(row)
        results.append(processed_row)

    num_rows = data.shape[0]
    for i in range(0, num_rows):
        row = pd.DataFrame(data.iloc[i, :]).T

        thread = threading.Thread(target=thread_function, args=(i, row))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()
    final = pd.concat(results, ignore_index=True)
    return final

In [67]:
parsed_data = create_threads_parse_rows(pd.DataFrame(processed_partial_data))
parsed_data

Exception in thread Thread-739 (thread_function):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py", line 3653, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 147, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 176, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 2606, in pandas._libs.hashtable.Int64HashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 2630, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-66-75e143614179>", line 6, in thread_functi

Unnamed: 0,Id,Date,Meal,Nutrition_Name,Value,Name,Sequence,Total_Calories,Total_Carbs,Total_Fats,Total_Protein,Total_Sodium,Total_Sugar,Goal_Calories,Goal_Carbs,Goal_Fats,Goal_Protein,Goal_Sodium,Goal_Sugar
0,1,2014-09-14,MY food,Calories,412,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
1,1,2014-09-14,MY food,Carbs,29,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
2,1,2014-09-14,MY food,Fat,24,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
3,1,2014-09-14,MY food,Protein,21,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
4,1,2014-09-14,MY food,Sodium,258,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,1,2014-09-14,MY food,Carbs,15,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
62,1,2014-09-14,MY food,Fat,10,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
63,1,2014-09-14,MY food,Protein,10,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
64,1,2014-09-14,MY food,Sodium,176,"Tasti - Salted Caramel Protein Bar, 40 g",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119


In [11]:
processed_partial_data

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."
1,1,2014-09-15,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2430}..."
2,1,2014-09-16,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 1862}..."
3,1,2014-09-17,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2251}..."
4,1,2014-09-18,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2001}..."
5,1,2014-09-19,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2158}..."
6,1,2014-09-20,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2691}..."
7,1,2014-09-21,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2524}..."
8,1,2014-09-22,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2182}..."
9,1,2014-09-23,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2443}..."
