In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('data.csv')

In [3]:
data

Unnamed: 0,UserID,Age,Gender,VRHeadset,Duration,MotionSickness,ImmersionLevel
0,1,40,Male,HTC Vive,13.598508,8,5
1,2,43,Female,HTC Vive,19.950815,2,2
2,3,27,Male,PlayStation VR,16.543387,4,2
3,4,33,Male,HTC Vive,42.574083,6,3
4,5,51,Male,PlayStation VR,22.452647,4,2
...,...,...,...,...,...,...,...
995,996,30,Male,HTC Vive,14.656763,9,2
996,997,57,Other,Oculus Rift,25.172419,7,5
997,998,35,Other,PlayStation VR,37.576139,7,2
998,999,52,Male,PlayStation VR,27.567473,7,2


In [4]:
from sklearn.cluster import KMeans
from mlxtend.frequent_patterns import apriori, association_rules

# 1) Demographic Analysis
# a) Which demographic is more likely to experience motion sickness
motion_sickness_by_demographic = data.groupby('Gender')['MotionSickness'].mean()

# b) Which demographic is more likely to achieve higher immersion levels
immersion_level_by_demographic = data.groupby('Gender')['ImmersionLevel'].mean()

# 2) VR Headset Performance
# Analyze motion sickness and duration by VR headset
vr_performance = data.groupby('VRHeadset').agg({'MotionSickness': 'mean', 'Duration': 'mean'})

# 3) Cluster Analysis
# Select features for clustering
features = data[['Age', 'Duration', 'MotionSickness', 'ImmersionLevel']]
# Normalize data
features_normalized = (features - features.mean()) / features.std()

# Perform KMeans clustering
kmeans = KMeans(n_clusters=3)  # You can adjust the number of clusters as needed
data['Cluster'] = kmeans.fit_predict(features_normalized)

# 4) Association Rule Mining
# Encode categorical variables
data_encoded = pd.get_dummies(data[['Gender', 'VRHeadset']])
# Perform association rule mining
frequent_items = apriori(data_encoded, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold=1)

# Print or visualize the results as needed
print("1) Demographic Analysis:")
print("a) Motion Sickness by Gender:")
print(motion_sickness_by_demographic)
print("\nb) Immersion Level by Gender:")
print(immersion_level_by_demographic)

print("\n2) VR Headset Performance:")
print(vr_performance)

print("\n3) Cluster Analysis:")
print(data[['UserID', 'Cluster']])

print("\n4) Association Rules:")
print(rules)


1) Demographic Analysis:
a) Motion Sickness by Gender:
Gender
Female    5.464615
Male      5.583587
Other     5.528902
Name: MotionSickness, dtype: float64

b) Immersion Level by Gender:
Gender
Female    2.969231
Male      3.000000
Other     2.959538
Name: ImmersionLevel, dtype: float64

2) VR Headset Performance:
                MotionSickness   Duration
VRHeadset                                
HTC Vive              5.482036  33.773805
Oculus Rift           5.379009  33.112082
PlayStation VR        5.727554  30.770171

3) Cluster Analysis:
     UserID  Cluster
0         1        1
1         2        2
2         3        2
3         4        0
4         5        2
..      ...      ...
995     996        2
996     997        1
997     998        0
998     999        2
999    1000        0

[1000 rows x 2 columns]

4) Association Rules:
                  antecedents                 consequents  antecedent support  \
0  (VRHeadset_PlayStation VR)             (Gender_Female)              