In [None]:
# Visual Python: Data Analysis > Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
index = ['Graphic', 'Industry', 'UX', 'Fashion']

In [None]:
results = pd.DataFrame({'Research':[611, 271, 490, 1309], 'Ideation': [738, 451, 163, 551],
                       'Mock-up': [25, 120, 245, 34], 'Production': [3232, 3697, 1687, 8678],
                       'Evaluation': [305, 1232, 2667, 0]}, index = index)

In [None]:
results

Unnamed: 0,Research,Ideation,Mock-up,Production,Evaluation
Graphic,611,738,25,3232,305
Industry,271,451,120,3697,1232
UX,490,163,245,1687,2667
Fashion,1309,551,34,8678,0


# Q1. AI is predominantly focused on its application in the later stages of the design process, with relatively fewer mentions in the initial stages of design.
## Independent two-sample t-test
### Hypothesis: Group1(Production + Evaluation) > Group2(Research + Ideation)
### Results: p-value = 0.034

In [None]:
q1 = pd.DataFrame({'Group_1': [3232, 3697, 1687, 8678, 305, 1232, 2667, 0],
                   'Group_2': [611,271, 490, 1309, 738, 451, 163, 551]})

In [None]:
# Visual Python: Student's t-test
# Independent two-sample t-test
vp_df1 = q1['Group_1'].dropna().copy()
vp_df2 = q1['Group_2'].dropna().copy()

# Normality test (Shapiro-Wilk)
from IPython.display import display, Markdown
from scipy import stats
_res1 = stats.shapiro(vp_df1)
_res2 = stats.shapiro(vp_df2)
display(Markdown('### Normality test (Shapiro-Wilk)'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'p-value':[_res1.pvalue,_res2.pvalue]},
                    index=[['Normality test (Shapiro-Wilk)' for i in range(2)],['Variable1','Variable2']]))

# Equal Variance test (Levene)
display(Markdown('### Equal Variance test (Levene)'))
_res = stats.levene(vp_df1, vp_df2, center='mean')
display(pd.DataFrame(data={'Statistic':_res.statistic,'p-value':_res.pvalue}, index=['Equal Variance test (Levene)']))

# Statistics
display(Markdown('### Statistics'))
display(pd.DataFrame(data={'N':[vp_df1.size,vp_df2.size],
                           'Mean':[vp_df1.mean(),vp_df2.mean()],
                           'Std. Deviation':[vp_df1.std(),vp_df2.std()],
                           'Std. Error mean':[vp_df1.std()/np.sqrt(vp_df1.size),
                                              vp_df2.std()/np.sqrt(vp_df2.size )]},
                     index=[['Statistics' for i in range(2)],['Variable1','Variable2']]))

# Independent two-sample t-test
_res1 = stats.ttest_ind(vp_df1, vp_df2, equal_var=True,  alternative='greater')
_res2 = stats.ttest_ind(vp_df1, vp_df2, equal_var=False, alternative='greater')
display(Markdown('### Independent two-sample t-test'))
display(pd.DataFrame(data={'Statistic':[_res1.statistic,_res2.statistic],'Alternative':['greater' for i in range(2)],
                           'p-value':[_res1.pvalue,_res2.pvalue],
                           'Mean difference':[vp_df1.mean()-vp_df2.mean() for i in range(2)]},
                     index=[['Independent two-sample t-test' for i in range(2)],['Equal variance' for i in range(2)],[True,False]]))
display(Markdown('If equal_var is False, perform Welch\'s t-test, which does not assume equal population variance'))

### Normality test (Shapiro-Wilk)

Unnamed: 0,Unnamed: 1,Statistic,p-value
Normality test (Shapiro-Wilk),Variable1,0.850837,0.097147
Normality test (Shapiro-Wilk),Variable2,0.893727,0.253366


### Equal Variance test (Levene)

Unnamed: 0,Statistic,p-value
Equal Variance test (Levene),6.069607,0.027316


### Statistics

Unnamed: 0,Unnamed: 1,N,Mean,Std. Deviation,Std. Error mean
Statistics,Variable1,8,2687.25,2757.34146,974.867422
Statistics,Variable2,8,573.0,348.860594,123.340846


### Independent two-sample t-test

Unnamed: 0,Unnamed: 1,Unnamed: 2,Statistic,Alternative,p-value,Mean difference
Independent two-sample t-test,Equal variance,True,2.151604,greater,0.024685,2114.25
Independent two-sample t-test,Equal variance,False,2.151604,greater,0.033624,2114.25


If equal_var is False, perform Welch's t-test, which does not assume equal population variance

# Q2. In the UX field, the evaluation stage registers the highest level of AI utilization, whereas in contrast, it appears significantly low in the realm of fashion design.
## One-sample z-test

In [85]:
from scipy.stats import norm

mean = 2667
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.697916815656145
P value: 0.0034887436815828066


In [86]:
mean = 0
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.7546476319644855
P value: 0.039659799339619314


In [87]:
mean = 305
std_dev = results['Evaluation'].std()
sample_size = results['Evaluation'].size

z_value = (mean - results['Evaluation'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.2454492230689878
P value: 0.10648333463411075


# Q3. In the UX field, the evaluation stage registers the highest level of AI utilization, whereas in contrast, it appears significantly low in the realm of fashion design.

#### 1. In the field of graphic design, AI is mainly discussed in the ideation phase compared to other areas.

#### 2. mock-up processes are mentioned much less compared to other areas.

#### 3. In the field of industrial design, AI is mentioned fairly evenly across almost all stages. In particular, the mock-up phase sees relatively more active discussions about AI except in the UX field where the mock-up stage is most significant.

#### 4. the research phase shows the lowest figures in the field of industrial design.

#### 5. In the UX field, as earlier mentioned, artificial intelligence is overwhelmingly cited in the mock-up and evaluation areas.

#### 6. In the UX field, as earlier mentioned, artificial intelligence is overwhelmingly cited in the mock-up and evaluation areas.

## One-sample z-test

In [88]:
mean = 738
std_dev = results['Ideation'].std()
sample_size = results['Ideation'].size

z_value = (mean - results['Ideation'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.185019972343432
P value: 0.014443691112035584


In [89]:
mean = 25
std_dev = results['Mock-up'].std()
sample_size = results['Mock-up'].size

z_value = (mean - results['Mock-up'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.5869646853492305
P value: 0.056260321546752506


In [90]:
mean = 245
std_dev = results['Mock-up'].std()
sample_size = results['Mock-up'].size

z_value = (mean - results['Mock-up'].mean()) / (std_dev / sample_size**0.5)
p_value = 1-norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 2.7233097686857164
P value: 0.0032315712616525216


In [91]:
mean = 271
std_dev = results['Research'].std()
sample_size = results['Research'].size

z_value = (mean - results['Research'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.7804587047216034
P value: 0.03750046088192855


In [92]:
mean = 163
std_dev = results['Ideation'].std()
sample_size = results['Ideation'].size

z_value = (mean - results['Ideation'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -2.605776916493454
P value: 0.004583307128540436


In [93]:
results_2 = results.transpose()
results_2

Unnamed: 0,Graphic,Industry,UX,Fashion
Research,611,271,490,1309
Ideation,738,451,163,551
Mock-up,25,120,245,34
Production,3232,3697,1687,8678
Evaluation,305,1232,2667,0


In [96]:
mean = 8678
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: 3.9589901266196326
P value: 3.7633673986015026e-05


In [97]:
mean = 1309
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = 1 - norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -0.4857960034096307
P value: 0.6864440959012339


In [99]:
mean = 34
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.2548423211986537
P value: 0.10476800290788146


In [100]:
mean = 0
std_dev = results_2['Fashion'].std()
sample_size = results_2['Fashion'].size

z_value = (mean - results_2['Fashion'].mean()) / (std_dev / sample_size**0.5)
p_value = norm.cdf(z_value)

print("Z value:", z_value)
print("P value:", p_value)

Z value: -1.275350223006361
P value: 0.10109265437886117
