# Food: a relevant marker of social inequalities?

In [8]:
# Imports
from urllib.request import urlopen
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np

import warnings
# Uncomment to hide Warnings
#warnings.filterwarnings('ignore')

import os
import pyspark
from pyspark.sql import *
from pyspark.sql.functions import *

## Study of OpenFoodFact

### Data cleaning

In [6]:
# Data
OFF_API_PATH = "https://world.openfoodfacts.org/{}.json"
OFF_API_QUERY_PATH = "https://world.openfoodfacts.org/cgi/search.pl?{}&page_size=1000&action=process&json=1"

### Data formating

### Data analysis and visualization

## Study of INCA 2

### Data cleaning

In [7]:
# Data
import platform

# Comment/Uncomment the line depending on your computer
plat = platform.system()
print(plat)
if plat =="Windows":
    encoding = "ansi" # Windows
elif plat =="Darwin":
    encoding = "latin" # Mac
else:
    raise ValueError("unknown os: {}".format(plat))

DATA_PATH = "data/{}.csv"

dfConso = pd.read_csv(DATA_PATH.format("Table_conso"), sep=";", encoding=encoding)
dfIndiv = pd.read_csv(DATA_PATH.format("Table_indiv"), sep=";", encoding=encoding)
dfCapiCA = pd.read_csv(DATA_PATH.format("Table_capi_ca"), sep=";", encoding=encoding)
dfCarnetCA = pd.read_csv(DATA_PATH.format("Table_carnet_ca_1"), sep=";", encoding=encoding)
dfIndivCA = pd.read_csv(DATA_PATH.format("Table_indiv_ca"), sep=";", encoding=encoding)
dfIndnut = pd.read_csv(DATA_PATH.format("Table_indnut"), sep=";", encoding=encoding)
dfMenage = pd.read_csv(DATA_PATH.format("Table_menage_1"), sep=";", encoding=encoding)
dfRepas = pd.read_csv(DATA_PATH.format("Table_repas"), sep=";", encoding=encoding)
dfNomenclature = pd.read_csv(DATA_PATH.format("Nomenclature_3"), sep=";", encoding=encoding)
dfCorrespondance = pd.read_csv(DATA_PATH.format("correspondance_reponses"), sep=";", encoding=encoding)
dfDataNames = pd.read_csv(DATA_PATH.format("Data_names_all"), sep=";", encoding=encoding)

Windows


### Data formating

In [None]:
def findCorrespondance(val, column):
    """
    Returns the correspondance of the numerical val for the corresponding column.
    """
    try:
        meaning = dfDataCorrespondance[(dfDataCorrespondance["Nom de la variable"] == column) & (dfDataCorrespondance["code"] == val)]["Signification"].values[0]
    except IndexError:
        meaning = val
    return meaning

def mapCorrespondances(vals, column):
    """
    Returns the correspondance of the numerical val for the corresponding column.
    """
    return [findCorrespondance(val, column) for val in vals]

def findDescription(column):
    """
    Returns the description of the column signification.
    """
    return dfDataNames[(dfDataNames["Nom de la variable"] == column)]["Libell√© de la variable"].values[0]

def mapDescription(columns):
    """
    Returns the description of the columns signification.
    """
    return [findDescription(column) for column in columns]

### Data analysis and visualization

#### BMI study

#### Interest in food

#### Displaying functions

In [None]:
def plotBar(table, x, y, index=None):
    """
    Plots a stacked normalized bar plot from table. Axis x is idx, axis y is col, count is column x.
    """
    if index == None: index = y
    consos = table[[x, y, index]].groupby([x, y]).count().unstack(level=1)[index]
    consos = consos.swapaxes(0,1)
    consos = consos / consos.sum(axis=0)
    consos = consos.swapaxes(0,1)
    consos.index = mapCorrespondances(consos.index, x)
    consos.columns = mapCorrespondances(consos.columns, y)
    consos.plot.bar(stacked=True)
    plt.show()

def plotScatter(table, col1, col2):
    """
    Scatters columns col1 and columns col2 of table
    """
    data = table[[col1, col2]].values
    plt.scatter(data[:,0], data[:,1])
    plt.show()
    
def getBinsFunc(x, nbins):
    """
    Create a function mapping x values into a categorical 
    """
    xmin = np.min(x)
    xmax = np.max(x) * 1.01
    return lambda v: np.int16(nbins * (v - xmin) / (xmax - xmin)) * (xmax - xmin) + xmin
    
def plotScatterCateX(table, x, y, index, nbins):
    """
    Plots a stacked normalized bar plot from table. Axis x is x, axis y is y, count is column x.
    """
    consos = table[[x, y, index]]
    f = getBinsFunc(consos[y], nbins)
    consos[y] = f(consos[y])
    plotBar(consos, x, y, index)
    