In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import json
import threading
from concurrent.futures import ThreadPoolExecutor

In [3]:
data = pd.read_csv('/content/drive/MyDrive/Final Project/mfp-diaries.tsv', sep='\t', header=None)
data.head()

Unnamed: 0,0,1,2,3
0,1,2014-09-14,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2924}..."
1,1,2014-09-15,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2430}..."
2,1,2014-09-16,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 1862}..."
3,1,2014-09-17,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2251}..."
4,1,2014-09-18,"[{""meal"": ""MY food"", ""dishes"": [{""nutritions"":...","{""total"": [{""name"": ""Calories"", ""value"": 2001}..."


In [4]:
def convert_to_json(row):
    id = row[0]
    date = row[1]
    details = json.loads(row[2])
    summary = json.loads(row[3])
    return pd.DataFrame([[id, date, details, summary]], columns=['Id', 'Date', 'Details', 'Summary'])

In [5]:
convert_to_json(data.iloc[0])

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."


In [6]:
def create_threads_for_rows(data):
    threads = []
    results = []

    def thread_function(index, row):
        processed_row = convert_to_json(row)
        results.append(processed_row)

    num_rows = data.shape[0]
    for i in range(num_rows):
        row = data.iloc[i, :]
        thread = threading.Thread(target=thread_function, args=(i, row))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    processed_data = pd.concat(results, ignore_index=True)
    return processed_data

In [58]:
partial_data = data.iloc[:10, :]
processed_partial_data = pd.DataFrame(create_threads_for_rows(partial_data))
processed_partial_data

Unnamed: 0,Id,Date,Details,Summary
0,1,2014-09-14,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2924}..."
1,1,2014-09-15,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2430}..."
2,1,2014-09-16,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 1862}..."
3,1,2014-09-17,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2251}..."
4,1,2014-09-18,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2001}..."
5,1,2014-09-19,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2158}..."
6,1,2014-09-20,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2691}..."
7,1,2014-09-22,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2182}..."
8,1,2014-09-21,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2524}..."
9,1,2014-09-23,"[{'meal': 'MY food', 'dishes': [{'nutritions':...","{'total': [{'name': 'Calories', 'value': 2443}..."


In [61]:
def parse_data(row):
    name_lst = []
    nutrishes_name_lst = []
    nutrishes_value_lst = []
    total = []
    goal = []

    id = row.iloc[:, 0].values[0]
    date = row.iloc[:, 1].values[0]
    meal_data = row.iloc[:, 2].values[0]
    meal = meal_data[0].get('meal')
    dishes = meal_data[0].get('dishes')
    detail_len = len(dishes)

    for i in range(detail_len):
        dish = dishes[i]
        for j in range(len(dish.get('nutritions'))):
            nutrition = dish.get('nutritions')[j]
            nutrishes_name = nutrition.get('name')
            nutrishes_value = nutrition.get('value')
            nutrishes_name_lst.append(nutrishes_name)
            nutrishes_value_lst.append(nutrishes_value)
            name = dish.get('name')
            name_lst.append(name)

    total_data = row.iloc[:, 3].values[0]
    total_caloriess = total_data.get('total')[0].get('value')
    total_carbs = total_data.get('total')[1].get('value')
    total_fats = total_data.get('total')[2].get('value')
    total_proteins = total_data.get('total')[3].get('value')
    total_sodiums = total_data.get('total')[4].get('value')
    total_sugars = total_data.get('total')[5].get('value')

    goal_caloriess = total_data.get('goal')[0].get('value')
    goal_carbs = total_data.get('goal')[1].get('value')
    goal_fats = total_data.get('goal')[2].get('value')
    goal_proteins = total_data.get('goal')[3].get('value')
    goal_sodiums = total_data.get('goal')[4].get('value')
    goal_sugars = total_data.get('goal')[5].get('value')

    sequence = meal_data[0].get('sequence')
    len_name = len(nutrishes_name_lst)
    len_value = len(nutrishes_value_lst)
    id_lst = [id] * len(nutrishes_name_lst)
    date_lst = [date] * len(nutrishes_name_lst)
    meal_lst = [meal] * len(nutrishes_name_lst)
    sequence_lst = [sequence] * len(nutrishes_name_lst)
    total_caloriess_lst = [total_caloriess] * len(nutrishes_name_lst)
    total_carbs_lst = [total_carbs] * len(nutrishes_name_lst)
    total_fats_lst = [total_fats] * len(nutrishes_name_lst)
    total_proteins_lst = [total_proteins] * len(nutrishes_name_lst)
    total_sodiums_lst = [total_sodiums] * len(nutrishes_name_lst)
    total_sugars_lst = [total_sugars] * len(nutrishes_name_lst)

    goal_caloriess_lst = [goal_caloriess] * len(nutrishes_name_lst)
    goal_carbs_lst = [goal_carbs] * len(nutrishes_name_lst)
    goal_fats_lst = [goal_fats] * len(nutrishes_name_lst)
    goal_proteins_lst = [goal_proteins] * len(nutrishes_name_lst)
    goal_sodiums_lst = [goal_sodiums] * len(nutrishes_name_lst)
    goal_sugars_lst = [goal_sugars] * len(nutrishes_name_lst)

    return pd.DataFrame({
        'Id': id_lst,
        'Date': date_lst,
        'Meal': meal_lst,
        'Nutrition_Name': nutrishes_name_lst,
        'Value': nutrishes_value_lst,
        'Name': name_lst,
        'Sequence': sequence_lst,
        'Total_Calories': total_caloriess_lst,
        'Total_Carbs': total_carbs_lst,
        'Total_Fats': total_fats_lst,
        'Total_Protein': total_proteins_lst,
        'Total_Sodium': total_sodiums_lst,
        'Total_Sugar': total_sugars_lst,
        'Goal_Calories': goal_caloriess_lst,
        'Goal_Carbs': goal_carbs_lst,
        'Goal_Fats': goal_fats_lst,
        'Goal_Protein': goal_proteins_lst,
        'Goal_Sodium': goal_sodiums_lst,
        'Goal_Sugar': goal_sugars_lst
    })


In [64]:
import threading
import queue

def create_threads_parse_rows(data):
    threads = []
    results_queue = queue.Queue()

    def thread_function(index, row):
        processed_row = parse_data(row)
        results_queue.put(processed_row)

    num_rows = data.shape[0]
    for i in range(num_rows):
        row = pd.DataFrame(data.iloc[i, :]).T
        thread = threading.Thread(target=thread_function, args=(i, row))
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    results = []
    while not results_queue.empty():
        results.append(results_queue.get())

    final = pd.concat(results, ignore_index=True) if results else pd.DataFrame()
    return final


In [65]:
parsed_data = create_threads_parse_rows(pd.DataFrame(processed_partial_data))
parsed_data.sort_values( by=['Id'])

Unnamed: 0,Id,Date,Meal,Nutrition_Name,Value,Name,Sequence,Total_Calories,Total_Carbs,Total_Fats,Total_Protein,Total_Sodium,Total_Sugar,Goal_Calories,Goal_Carbs,Goal_Fats,Goal_Protein,Goal_Sodium,Goal_Sugar
0,1,2014-09-14,MY food,Calories,412,"my - McDonalds Espresso Pronto® Flat White, 2 ...",1,2924,340,114,186,3658,109,3173,396,105,160,2300,119
319,1,2014-09-20,MY food,Carbs,22,"Quest Bar - Cookies and Cream, 1 bar",1,2691,282,92,216,2623,134,2823,352,93,142,2300,106
318,1,2014-09-20,MY food,Calories,180,"Quest Bar - Cookies and Cream, 1 bar",1,2691,282,92,216,2623,134,2823,352,93,142,2300,106
317,1,2014-09-20,MY food,Sugar,2,"Coon - Light and Tasty Cheese Slices, 2 Slice",1,2691,282,92,216,2623,134,2823,352,93,142,2300,106
316,1,2014-09-20,MY food,Sodium,302,"Coon - Light and Tasty Cheese Slices, 2 Slice",1,2691,282,92,216,2623,134,2823,352,93,142,2300,106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149,1,2014-09-17,MY food,Sugar,34,"Pauls - Smarter White Milk, 600 ml",1,2251,187,60,98,1765,105,1685,210,56,85,2300,63
148,1,2014-09-17,MY food,Sodium,402,"Pauls - Smarter White Milk, 600 ml",1,2251,187,60,98,1765,105,1685,210,56,85,2300,63
147,1,2014-09-17,MY food,Protein,24,"Pauls - Smarter White Milk, 600 ml",1,2251,187,60,98,1765,105,1685,210,56,85,2300,63
159,1,2014-09-17,MY food,Protein,0,James Squire - Orchard Crush - Apple Cider (Ca...,1,2251,187,60,98,1765,105,1685,210,56,85,2300,63
