In [1]:
# import sys
# !conda install --yes --prefix {sys.prefix} pingouin

In [2]:
import numpy as np
import math
import matplotlib.pyplot as plt
import os
import pandas as pd
import pingouin as pg
import seaborn as sns
import scipy
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm 
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import sys

if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO
    
from IPython.display import HTML
def View(df):
    css = """<style>
    table { border-collapse: collapse; border: 3px solid #eee; }
    table tr th:first-child { background-color: #eeeeee; color: #333; font-weight: bold }
    table thead th { background-color: #eee; color: #000; }
    tr, th, td { border: 1px solid #ccc; border-width: 1px 0 0 1px; border-collapse: collapse;
    padding: 3px; font-family: monospace; font-size: 10px }</style>
    """
    s  = '<script type="text/Javascript">'
    s += 'var win = window.open("", "Title", "toolbar=no, location=no, directories=no, status=no, menubar=no, scrollbars=yes, resizable=yes, width=780, height=200, top="+(screen.height-400)+", left="+(screen.width-840));'
    s += 'win.document.body.innerHTML = \'' + (df.to_html() + css).replace("\n",'\\') + '\';'
    s += '</script>'
    return(HTML(s+css))    
    
print("Current Working directory " , os.getcwd())

Current Working directory  C:\Users\User\GitHub\WebET_Analysis


In [3]:
# from IPython.display import HTML

# HTML('''<script>
# code_show=true; 
# function code_toggle() {
#  if (code_show){
#  $('div.input').hide();
#  } else {
#  $('div.input').show();
#  }
#  code_show = !code_show
# } 
# $( document ).ready(code_toggle);
# </script>
# <form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

# Read data

In [4]:
data_et_fixCal = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_et_fixCal.csv')
data_trial_fixCal = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_trial_fixCal.csv')
data_subject = pd.read_csv(r'C:/Users/User/GitHub/WebET_Analysis/data_jupyter/data_subject.csv')

overview = pd.DataFrame(
    [
        [len(data_et_fixCal)],
        [len(data_trial_fixCal)],
        [len(data_subject)]
    ], 
    columns=['length'],
    index=[
        'data_et_fixCal', 
        'data_trial_fixCal', 
        'data_subject'
          ]
)
print(overview)

                   length
data_et_fixCal     389064
data_trial_fixCal    9556
data_subject           51


# Clean

In [5]:
NTrials = data_trial_fixCal.groupby(['run_id'])['trial_index'].max() \
    .reset_index()
subjects_not_enough_trials = NTrials.loc[NTrials['trial_index']<400, 'run_id']
print('These subjects do not have enough trials: \n' + 
      str(NTrials.loc[NTrials['trial_index']<400, :])
     )

These subjects do not have enough trials: 
    run_id  trial_index
27      61        271.0


In [6]:
cleanSubjects = data_subject.loc[
        (data_subject['status']=='APPROVED') &
        (data_subject['keptHead']!=0) &
        (data_subject['triedChin']!=0) &
        pd.notna(data_subject['fps']) &
        ~(data_subject['run_id'].isin(subjects_not_enough_trials)), 
        'run_id'
    ]
cleanSubjects.unique()

KeyError: 'fps'

In [None]:
print('data_trial_fixCal raw: ' + str(len(data_trial_fixCal)))
data_trial_fixCal = data_trial_fixCal.loc[
    data_trial_fixCal['run_id'].isin(cleanSubjects),
    :
]
print('data_trial_fixCal cleaned: ' + str(len(data_trial_fixCal)))

In [None]:
print('data_et_fixCal raw: ' + str(len(data_et_fixCal)))
data_et_fixCal = data_et_fixCal.loc[
    (data_et_fixCal['x']>0) &
    (data_et_fixCal['x']<1) &
    (data_et_fixCal['y']>0) &
    (data_et_fixCal['y']<1) &
    (data_et_fixCal['t_task']>1000) &
    (data_et_fixCal['run_id'].isin(cleanSubjects)), 
    :
]
print('data_et_fixCal cleaned: ' + str(len(data_et_fixCal)))

In [None]:
print('data_subject raw: ' + str(len(data_subject)))
data_subject = data_subject.loc[data_subject['run_id'].isin(cleanSubjects), :]
print('data_subject cleaned: ' + str(len(data_subject)))
data_subject['webcam_fps'] = np.round(data_subject['webcam_fps'])

# Add variables

## Offset

In [None]:
def euclideanDistance(x, x_target, y, y_target):
    x_diff = x - x_target
    y_diff = y - y_target
    euclideanDistance = np.sqrt(x_diff**2 + y_diff**2)
    return(euclideanDistance)

data_et_fixCal["offset"] = euclideanDistance(
    data_et_fixCal["x"], data_et_fixCal['x_pos'],
    data_et_fixCal["y"], data_et_fixCal['y_pos'])

In [None]:
def merge_mean_by_index(data, largeData, varName):
    if (varName + '_mean' in data.columns):
        print(varName + '_mean already added!')
    else:
        grouped = largeData.groupby(['run_id', 'trial_index'])[varName].mean() \
            .reset_index() \
            .rename(columns={varName: varName + '_mean'})
        
        data = data.merge(grouped, on=['run_id', 'trial_index'], how='left') 
    return data

data_trial_fixCal = merge_mean_by_index(data_trial_fixCal, data_et_fixCal, 'offset')
data_trial_fixCal['offset_mean'].describe()

In [None]:
data_subject = data_subject.merge(
    data_trial_fixCal.groupby(['run_id'])['offset_mean'].mean() \
        .reset_index(),
    on='run_id',
    how='left'
)
data_subject['offset_mean'].describe()

In [None]:
grouped = data_trial_fixCal.groupby(['run_id', 'chin'])['offset_mean'].mean() \
    .reset_index()
grouped = grouped.pivot(index='run_id', columns='chin', values='offset_mean') \
    .reset_index() \
    .rename(columns={0.0: 'offset_chin_0', 1.0: 'offset_chin_1'})

data_subject = data_subject.merge(grouped, on='run_id', how='left')

## Precision

In [None]:
data_et_fixCal = merge_mean_by_index(data_et_fixCal, data_et_fixCal, 'x')
data_et_fixCal = merge_mean_by_index(data_et_fixCal, data_et_fixCal, 'y')

In [None]:
def addPrecision(data):
    if 'precision' in data.columns: 
        print('precision already added!')
    else:
        data['deviationFromAVG'] = euclideanDistance(
                data['x'], data['x_mean'], data['y'], data['y_mean']
            )
        data['deviationFromAVG_square'] = np.power(data['deviationFromAVG'], 2)
        grouped = data.groupby(['run_id', 'trial_index']).mean() \
            .reset_index()
        grouped['precision'] = np.sqrt(grouped['deviationFromAVG_square'])
        data = data.merge(
                grouped.loc[:, ['run_id', 'trial_index', 'precision']], 
                on=['run_id', 'trial_index'],
                how='left'
            )
                                
    return data

data_et_fixCal = addPrecision(data_et_fixCal)
data_et_fixCal['precision'].describe()

In [None]:
data_trial_fixCal = merge_mean_by_index(data_trial_fixCal, data_et_fixCal, 'precision')

In [None]:
data_subject = data_subject.merge(
    data_trial_fixCal.groupby(['run_id'])['precision_mean'].mean() \
        .reset_index(),
    on='run_id',
    how='left'
)

In [None]:
grouped = data_trial_fixCal.groupby(['run_id', 'chin'])['precision_mean'].mean() \
    .reset_index()
grouped = grouped.pivot(index='run_id', columns='chin', values='precision_mean') \
    .reset_index() \
    .rename(columns={0.0: 'precision_chin_0', 1.0: 'precision_chin_1'})

data_subject = data_subject.merge(grouped, on='run_id', how='left')

# datasets for fixation task

In [None]:
data_trial_fix = data_trial_fixCal.loc[
    (data_trial_fixCal['trial_type']=='eyetracking-fix-object') &
    (data_trial_fixCal['trial_duration']==5000), 
    :
]
data_trial_fix.columns

In [None]:
temp = data_et_fixCal.merge(
    data_trial_fixCal.loc[:, ['run_id', 'trial_index', 'trial_duration']],
    on=['run_id', 'trial_index'],
    how='left'
)

data_et_fix = temp.loc[
    (temp['trial_type']=='eyetracking-fix-object') &
    (temp['trial_duration']==5000) & 
    (temp['run_id'].isin(data_trial_fixCal['run_id'].unique())), 
    :
]
data_et_fix.columns

# Demographics

In [None]:
for col in ['Nationality', 'Employment Status', 'webcam_fps']:
    print(pd.crosstab(index=data_subject[col],
                      columns="count")
         )
    print('\n')

# Manipulation Check

## Followed the instructions

In [None]:
data_trial_fixCal['run_id'].unique()

In [None]:
data_subject.loc[
    (data_subject['keptHead']==0) |
    (data_subject['triedChin']==0), 
    ['run_id', 'prolificID', 'keptHead', 'triedChin']
]

# Gaze predictions for each position

In [None]:
data_plot = data_et_fix.merge(data_trial_fix.loc[:, ['run_id', 'trial_index', 'chin']], 
                             on=['run_id', 'trial_index'], 
                             how='left')

fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(18, 12))
axes = axes.ravel()
xPos = [0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8]
yPos = [0.2, 0.2, 0.2, 0.5, 0.5, 0.5, 0.8, 0.8, 0.8]

for i in range(0, 9):
    axes_data = data_plot.loc[
        (data_plot['run_id']==43) & 
        (data_plot['chin']==0) &
        (data_plot['x_pos']==xPos[i]) & 
        (data_plot['y_pos']==yPos[i]), 
        :]
    im = axes[i].scatter(
        axes_data['x'], 
        axes_data['y'], 
        c=axes_data['t_task'], 
        cmap='viridis'
    )
    axes[i].set_ylim(1, 0)
    axes[i].set_xlim(0, 1)
    
fig.colorbar(im, ax=axes)
plt.show()

# fps across subjects and trials

In [None]:
plt.hist(data_subject['fps'], bins=15)

In [None]:
def spaghettiPlot(data, xVar, yVar):
    highlightedSubject = data['run_id'].unique()[0]
    plt.style.use('seaborn-darkgrid')
    fig, ax = plt.subplots(figsize=(15, 10))

    # Draw Plots
    for subject in data["run_id"].unique():
        df_thisSubject = data[data['run_id']==subject]
        ax.plot(data.loc[data['run_id']==subject, xVar], 
                data.loc[data['run_id']==subject, yVar], 
                marker='', color='grey', linewidth=1, alpha=0.4)

    # Highlight Subject 
    ax.plot(data.loc[data['run_id']==highlightedSubject, xVar], 
            data.loc[data['run_id']==highlightedSubject, yVar], 
            marker='', color='orange', linewidth=4, alpha=0.7)

    # Let's annotate the plot
    for subject in data["run_id"].unique():
        if subject != highlightedSubject:
            ax.text(data.loc[data['run_id']==subject, xVar].max()+1, 
                    data.loc[data['run_id']==subject, yVar].tail(1),
                    s=subject, horizontalalignment='left', size='small', color='grey')

        else:
            ax.text(data.loc[data['run_id']==subject, xVar].max()+1, 
                    data.loc[data['run_id']==subject, yVar].tail(1),
                    s=subject, horizontalalignment='left', size='small', color='orange')
    return plt

spaghettiPlot(
    data_trial_fixCal.loc[data_trial_fixCal['chinFirst']==0, :],     
    'trial_index', 
    'fps')
plt.title('chinFirst == 0', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('trial_index')
plt.ylabel('fps')
plt.vlines(18, 45, 50, colors='k', linestyles='solid')
plt.text(18+1, 50, s='Calibration')
plt.vlines(105, 45, 50, colors='k', linestyles='solid')
plt.text(105+1, 50, s='fix Task')
plt.vlines(143, 45, 50, colors='k', linestyles='solid')
plt.text(143+1, 50, s='Calibration')
plt.vlines(230, 45, 50, colors='k', linestyles='solid')
plt.text(230+1, 50, s='fix Task')
plt.vlines(269, 45, 50, colors='k', linestyles='solid')
plt.text(269+1, 50, s='choice Task')
plt.show()

In [None]:
spaghettiPlot(
    data_trial_fixCal.loc[data_trial_fixCal['chinFirst']==1, :],     
    'trial_index', 
    'fps')
plt.title('chinFirst == 1', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('trial_index')
plt.ylabel('fps')

plt.vlines(18, 45, 50, colors='k', linestyles='solid')
plt.text(18+1, 50, s='Calibration')
plt.vlines(105, 45, 50, colors='k', linestyles='solid')
plt.text(105+1, 50, s='fix Task')
plt.vlines(144, 45, 50, colors='k', linestyles='solid')
plt.text(144+1, 50, s='choice Task')
plt.vlines(394, 45, 50, colors='k', linestyles='solid')
plt.text(394+1, 50, s='Calibration')
plt.vlines(482, 45, 50, colors='k', linestyles='solid')
plt.text(482+1, 50, s='fix Task')

plt.show()

# Chin-rest effect

In [None]:
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,6))
fig.suptitle('chinFirst==0 vs. chinFirst==1')

sns.boxplot(ax=axes[0], x='chin', y='offset_mean', data=data_trial_fix)
sns.boxplot(ax=axes[1], x='chin', y='precision_mean', data=data_trial_fix)
   
plt.show()

In [None]:
means_0 = data_subject.loc[:, ['offset_chin_0', 'offset_chin_1', 'precision_chin_0', 'precision_chin_1']].mean() \
    .reset_index() \
    .rename(columns={0: 'mean'})
var_0 = data_subject.loc[:, ['offset_chin_0', 'offset_chin_1', 'precision_chin_0', 'precision_chin_1']].var() \
    .reset_index() \
    .rename(columns={0: 'var'})
means_0.merge(var_0, on='index', how='left')

In [None]:
scipy.stats.ttest_rel(data_subject['offset_chin_0'], data_subject['offset_chin_1'])

## Effect for various positions. Overall slightly lower accuracy

In [None]:
grouped_var = data_trial_fix.groupby(['positionIndex', 'chin']) \
    ['offset_mean', 'precision_mean'].var() \
    .rename(columns={'offset_mean': 'offset_var', 'precision_mean':'precision_var'}) \
    .reset_index()

grouped_means = data_trial_fix.groupby(['positionIndex', 'chin']) \
    ['offset_mean', 'precision_mean'].mean() \
    .reset_index()

grouped = grouped_means.merge(grouped_var, on=['positionIndex', 'chin'], how='left')

grouped_offset_mean = grouped.loc[:, ['positionIndex', 'chin', 'offset_mean']] \
    .set_index(['positionIndex']) \
    .pivot(columns="chin")['offset_mean'] \
    .reset_index() \
    .rename(columns={0.0: 'offset_mean_chin_0', 1.0: 'offset_mean_chin_1'}) 

grouped_offset_var = grouped.loc[:, ['positionIndex', 'chin', 'offset_var']] \
    .set_index(['positionIndex']) \
    .pivot(columns="chin")['offset_var'] \
    .reset_index() \
    .rename(columns={0.0: 'offset_var_chin_0', 1.0: 'offset_var_chin_1'}) 

grouped_precision_mean = grouped.loc[:, ['positionIndex', 'chin', 'precision_mean']] \
    .set_index(['positionIndex']) \
    .pivot(columns="chin")['precision_mean'] \
    .reset_index() \
    .rename(columns={0.0: 'precision_mean_chin_0', 1.0: 'precision_mean_chin_1'}) 

grouped_precision_var = grouped.loc[:, ['positionIndex', 'chin', 'precision_var']] \
    .set_index(['positionIndex']) \
    .pivot(columns="chin")['precision_var'] \
    .reset_index() \
    .rename(columns={0.0: 'precision_var_chin_0', 1.0: 'precision_var_chin_1'}) 

overview = pd.concat(
    [
        grouped_offset_mean,
        grouped_offset_var.iloc[:, 1:3],
        grouped_precision_mean.iloc[:, 1:3], 
        grouped_precision_var.iloc[:, 1:3]
    ], 
    axis=1
)
print(overview.mean())
overview

# Offset

## offset ~ chin-rest across chinFirst

In [None]:
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,6))
fig.suptitle('chinFirst==0 vs. chinFirst==1')

sns.boxplot(ax=axes[0], x='chin', y='offset_mean', 
            data=data_trial_fix.loc[data_trial_fix['chinFirst']==0, :])
sns.boxplot(ax=axes[1], x='chin', y='offset_mean', 
            data=data_trial_fix.loc[data_trial_fix['chinFirst']==1, :])
   
plt.show()

## offset across calibration

In [None]:
spaghettiPlot(
    data_trial_fixCal.loc[
        (data_trial_fixCal['trial_type']=='eyetracking-calibration') &
        (data_trial_fixCal['chin']==0),
         :
    ], 
    'withinTaskIndex', 
    'offset_mean'
)
plt.ylim(0, 1)
plt.title('Offset across calibration for chin==0', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Offset')

plt.show()

In [None]:
spaghettiPlot(
    data_trial_fixCal.loc[
        (data_trial_fixCal['trial_type']=='eyetracking-calibration') &
        (data_trial_fixCal['chin']==1),
         :
    ], 
    'withinTaskIndex', 
    'offset_mean'
)
plt.ylim(0, 1)
plt.title('Offset across calibration for chin==1', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Offset')

plt.show()

# Precision

## precision ~ chin-rest across chinFirst

In [None]:
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,6))
fig.suptitle('chinFirst==0 vs. chinFirst==1')

sns.boxplot(ax=axes[0], x='chin', y='precision_mean', 
            data=data_trial_fix.loc[data_trial_fix['chinFirst']==0, :])
sns.boxplot(ax=axes[1], x='chin', y='precision_mean', 
            data=data_trial_fix.loc[data_trial_fix['chinFirst']==1, :])
   
plt.show()

## Precision across calibration

In [None]:
spaghettiPlot(
    data_trial_fixCal.loc[
        (data_trial_fixCal['trial_type']=='eyetracking-calibration') &
        (data_trial_fixCal['chin']==0),
         :
    ], 
    'withinTaskIndex', 
    'precision_mean'
)
plt.ylim(0, 1)
plt.title('Precision across calibration for chin==0', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Precision')

plt.show()

In [None]:
spaghettiPlot(
    data_trial_fixCal.loc[
        (data_trial_fixCal['trial_type']=='eyetracking-calibration') &
        (data_trial_fixCal['chin']==1),
         :
    ], 
    'withinTaskIndex', 
    'precision_mean'
)
plt.ylim(0, 1)
plt.title('Precision across calibration for chin==1', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('withinTaskIndex')
plt.ylabel('Precision')

plt.show()

In [None]:
spaghettiPlot(
    data_trial_fix.loc[
        (data_trial_fix['chinFirst']==1) &
        (data_trial_fix['chin']==0), 
        :],     
    'withinTaskIndex', 
    'offset_mean')
plt.title('chinFirst == 1', loc='center', fontsize=12, fontweight=0, color='grey')
plt.xlabel('trial_index')
plt.ylabel('offset_mean')

In [None]:
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,6))
fig.suptitle('chinFirst==0 vs. chinFirst==1')

sns.boxplot(ax=axes[0], x='chin', y='precision', 
            data=data_trial_fixation.loc[data_trial_fixation['chinFirst']==0, :])
sns.boxplot(ax=axes[1], x='chin', y='precision', 
            data=data_trial_fixation.loc[data_trial_fixation['chinFirst']==1, :])
   
plt.show()

In [None]:
data_et_fixation['run_id'].unique()

## Chin rest condition

In [None]:
fixation = data_trial.loc[(data_trial['trial_type']=='eyetracking-fix-object') &
               (pd.notna(data_trial['withinTaskIndex'])), :] \
    .groupby['run_id', 'trial_index']['offset'].mean()
    .reset_index(drop=True) 
len(fixation.loc[fixation['chin']==1])

In [None]:
data = data_trial_fixation.loc[data_trial_fixation['run_id'] == 0, :]
predictor = 'chin'
outcome = 'offset'
sample_A = data.loc[data[predictor] == 0, ['trial_index', 'offset']] \
    .drop_duplicates() \
    .reset_index(drop=True)

View(sample_A)

## H1, H2, H4

### Data Cleaning

In [None]:
# Missing values
## Row-wise
# print(df.isnull().values.sum())
## Column-wise
# print(df.isnull().sum())

In [None]:
data_trial_choice_clean = data_trial_choice \
    .loc[(data_trial_choice['trial_duration_exact']<10000), :]

In [None]:
fig, axes = plt.subplots(1, 4, sharey=True, figsize=(15,6))
fig.suptitle('Subject properties vs. choice outcomes')

predictors = [
                'gender',
                'ethnic', 
                'degree',
                'chinFirst', 
             ]  
for i in range(0, len(predictors)):
    sns.boxplot(ax=axes[i], x=predictors[i], y='choseLL', data=data_subject)
   
plt.show()

In [None]:
# window_diagonal_max
# age
# choiceNr
def bubblePlot(categorical_variable, distinct_variable, data):
    
    sns.set_context("talk", font_scale=1.1)
    plt.figure(figsize=(5,6))
    
    data_bubblePlot = data.loc[:, [categorical_variable, distinct_variable]]
    grouped = data_bubblePlot.groupby([distinct_variable]) \
        .count() \
        .rename(columns={categorical_variable: 'n'}) \
        .reset_index()
    data_bubblePlot = data_bubblePlot.merge(grouped, 
                                                 on=distinct_variable, 
                                                 how='left')
    data_bubblePlot['n'] = data_bubblePlot['n']     
    
    sns.scatterplot(x=categorical_variable, 
                    y=distinct_variable,
                    size='n',            
                    data=data_bubblePlot)
    plt.legend(bbox_to_anchor=(1.01, 1),borderaxespad=0)
    plt.xlabel(categorical_variable)
    plt.xticks([0, 1])
    plt.ylabel(distinct_variable)
    plt.tight_layout()
    plt.show()

bubblePlot(categorical_variable = 'choseLL', 
           distinct_variable = 'age', 
           data = data_trial_choice)

### Correlation

In [None]:
correlation_columns = [
                            'trial_duration_exact',
                            'fps',
                            'k', 
                            'choiceNr',
                            'optionIndex',
                            'attributeIndex', 
                            'payneIndex',
                        ]

### Plot

In [None]:
sns.pairplot(data_trial_choice_clean.loc[:,
        np.append(correlation_columns, ['LL_top'])
    ],
    hue='LL_top')

In [None]:
sns.pairplot(data_trial_choice_clean.loc[:,
        np.append(correlation_columns, ['choseTop'])
    ],
    hue='choseTop')

In [None]:
corr_matrix = np.corrcoef(data_trial_choice_clean[correlation_columns].T)
smg.plot_corr(corr_matrix, xnames=correlation_columns)
plt.show()

In [None]:
data_trial_choice_clean[correlation_columns].rcorr()

In [None]:
data_trial_choice_clean[correlation_columns].rcorr(upper='n')

### 4.1.1 Requirements

In [None]:
X = data_trial_choice[["run_id", "choiceNr", "optionIndex", 'attributeIndex', 'payneIndex']]
y = 1-data_trial_choice[["choseLL"]]  

In [None]:
def noMissingValues(data):
    null_data = X.loc[X.isnull().any(axis=1), :]
    if len(null_data) > 0:
        print('There are some missing data. Look here: \n \n')
        print(null_data)
        return(False)
    else:
        print('No missing values!')
        return(True)

In [None]:
noMissingValues(X)