In [2]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
index = ['Graphic', 'Industry', 'UX', 'Fashion']

In [4]:
results = pd.DataFrame({'Research':[560, 271, 544, 1315], 'Ideation': [738, 451, 163, 554],
                       'Mock-up': [25, 120, 245, 35], 'Production': [3232, 3697, 1687, 8724],
                       'Evaluation': [305, 1232, 2667, 0]}, index = index)

# Q1. AI is predominantly focused on its application in the later stages of the design process, with relatively fewer mentions in the initial stages of design.
## Independent two-sample t-test
### Hypothesis: Group1(Production + Evaluation) > Group2(Research + Ideation)
### Results: p-value = 0.034

In [6]:
q1 = pd.DataFrame({'Group_1': [3232, 3697, 1687, 8724, 305, 1232, 2667, 0],
                   'Group_2': [560, 271, 544, 1315, 738, 451, 163, 554]})

In [7]:
# Visual Python: Student's t-test
# Independent two-sample t-test
vp_df1 = q1['Group_1'].dropna().copy()
vp_df2 = q1['Group_2'].dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res1 = stats.shapiro(vp_df1)
_res2 = stats.shapiro(vp_df2)
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'p-value':[_res1.pvalue,_res2.pvalue]},
                    index=[['Normality test (Shapiro-Wilk)' for i in range(2)],['Variable1','Variable2']]))

# Equal Variance test (Levene)
display(Markdown('### Equal Variance test (Levene)'))
_res = stats.levene(vp_df1, vp_df2, center='mean')
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df1.size,vp_df2.size],
                           'Mean':[vp_df1.mean(),vp_df2.mean()],
                           'Std. Deviation':[vp_df1.std(),vp_df2.std()],
                           'Std. Error mean':[vp_df1.std()/np.sqrt(vp_df1.size),
                                              vp_df2.std()/np.sqrt(vp_df2.size )]},
                     index=[['Statistics' for i in range(2)],['Variable1','Variable2']]))

# Independent two-sample t-test
_res1 = stats.ttest_ind(vp_df1, vp_df2, equal_var=True,  alternative='greater')
_res2 = stats.ttest_ind(vp_df1, vp_df2, equal_var=False, alternative='greater')
display(Markdown('### Independent two-sample t-test'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'Alternative':['greater' for i in range(2)],
                           'p-value':[_res1.pvalue,_res2.pvalue],
                           'Mean difference':[vp_df1.mean()-vp_df2.mean() for i in range(2)]},
                     index=[['Independent two-sample t-test' for i in range(2)],['Equal variance' for i in range(2)],[True,False]]))
display(Markdown('If equal_var is False, perform Welch\'s t-test, which does not assume equal population variance'))

### Normality test (Shapiro-Wilk)

Unnamed: 0,Unnamed: 1,Statistic,p-value
Normality test (Shapiro-Wilk),Variable1,0.849068,0.093219
Normality test (Shapiro-Wilk),Variable2,0.875604,0.170872


### Equal Variance test (Levene)

Unnamed: 0,Statistic,p-value
Equal Variance test (Levene),6.097829,0.027017


### Statistics

Unnamed: 0,Unnamed: 1,N,Mean,Std. Deviation,Std. Error mean
Statistics,Variable1,8,2693.0,2771.629845,979.919129
Statistics,Variable2,8,574.5,349.147942,123.442439


### Independent two-sample t-test

Unnamed: 0,Unnamed: 1,Unnamed: 2,Statistic,Alternative,p-value,Mean difference
Independent two-sample t-test,Equal variance,True,2.144961,greater,0.024992,2118.5
Independent two-sample t-test,Equal variance,False,2.144961,greater,0.033965,2118.5


If equal_var is False, perform Welch's t-test, which does not assume equal population variance

# Q2. In the UX field, the evaluation stage registers the highest level of AI utilization, whereas in contrast, it appears significantly low in the realm of fashion design. Similarly, the field of graphic design also shows notably low figures.
## z-test

In [8]:
from scipy.stats import norm

mean = 2667
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.697916815656145
P value: 0.0034887436815828066


In [9]:
mean = 0
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.7546476319644855
P value: 0.039659799339619314


In [10]:
mean = 305
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.2454492230689878
P value: 0.10648333463411075


# Q3.

#### 1. In the field of graphic design, AI is mainly discussed in the ideation phase compared to other areas.

#### 2. mock-up processes are mentioned much less compared to other areas.

#### 3. In the field of industrial design, AI is mentioned fairly evenly across almost all stages. In particular, the mock-up phase sees relatively more active discussions about AI except in the UX field where the mock-up stage is most significant.

#### 4. the research phase shows the lowest figures in the field of industrial design.

#### 5. On the other hand, the ideation stage is the least utilized because in the UX field, it is more important to develop modules that are useful, usable, findable, and accessible, rather than developing new modules based solely on creative ideas (Morville 2005).

#### 6, 7. Lastly, in the field of fashion, high figures are observed in both the research and production stages.

#### 8, 9. The mock-up and evaluation stages, however, are scarcely mentioned. This shows the technical limitations of AI, as the fashion design field involves the physical creation and evaluation of samples using fabrics, materials, and textiles (Evangelista 2020).

## One-sample z-test

In [5]:
results

Unnamed: 0,Research,Ideation,Mock-up,Production,Evaluation
Graphic,560,738,25,3232,305
Industry,271,451,120,3697,1232
UX,544,163,245,1687,2667
Fashion,1315,554,35,8724,0


In [30]:
results_2 = results.transpose()
results_2

Unnamed: 0,Graphic,Industry,UX,Fashion
Research,560,271,544,1315
Ideation,738,451,163,554
Mock-up,25,120,245,35
Production,3232,3697,1687,8724
Evaluation,305,1232,2667,0


In [11]:
mean = 738
std_dev = results['Ideation'].std()
sample_size = results['Ideation'].size

z_value = (mean - results['Ideation'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.175888932503716
P value: 0.014781783285069738


In [32]:
mean = 25
std_dev = results_2['Graphic'].std()
sample_size = results_2['Graphic'].size

z_value = (mean - results_2['Graphic'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.6393755291242338
P value: 0.05056753740115679


In [13]:
mean = 245
std_dev = results['Mock-up'].std()
sample_size = results['Mock-up'].size

z_value = (mean - results['Mock-up'].mean()) / (std_dev / sample_size**0.5)
p_value = 1-norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.724661424007945
P value: 0.0032183728238290854


In [14]:
mean = 271
std_dev = results['Research'].std()
sample_size = results['Research'].size

z_value = (mean - results['Research'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.79082762940694
P value: 0.03666048001690816


In [33]:
mean = 2667
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.697916815656145
P value: 0.0034887436815828066


In [27]:
mean = 163
std_dev = results_2['UX'].std()
sample_size = results_2['UX'].size

z_value = (mean - results_2['UX'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.8507473319490144
P value: 0.03210295563042785


In [19]:
mean = 1315
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -0.48636389734679736
P value: 0.6866454085705993


In [18]:
mean = 8724
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 3.959071724960656
P value: 3.762081988711863e-05


In [34]:
mean = 35
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.2543700515583698
P value: 0.10485376638360033


In [35]:
mean = 0
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.2753702198375925
P value: 0.10108911707985896


In [28]:
# Visual Python: Data Analysis > Snippets
#@title Convert ipynb to HTML in Colab
# Upload ipynb
from google.colab import files
f = files.upload()

# Convert ipynb to html
import subprocess
file0 = list(f.keys())[0]
_ = subprocess.run(["pip", "install", "nbconvert"])
_ = subprocess.run(["jupyter", "nbconvert", file0, "--to", "html"])

# download the html
files.download(file0[:-5]+"html")


Saving Data Analysis_Revised_final_statistical test.ipynb to Data Analysis_Revised_final_statistical test.ipynb


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>