<a href="https://colab.research.google.com/github/BonnieChester/GSM-KU-Workshop/blob/main/Similarity_for_WS_(six_facets).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers
!pip install pandas
!pip install gdown



In [None]:
import os
import pandas as pd
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import nltk
from nltk.corpus import stopwords
# Define paths
data_path = '/content/drive/MyDrive/Colab Notebooks/part4.csv'  # Update with your dataset path
intrapersonal_text =  'This refers to the individual outlook, perceptions and feelings that the video game produces in each player when they play, and as such has a high subjective value. '
artistic_text = 'This facet relates to the quality of the artistic and aesthetic rendering in the game elements visual graphics, sound effects, music and melodies, storyline and story telling and how these elements are executed in the video game. '
interactive_text = 'This is associated with player interaction and video game user interface development, for example, interaction dialogue and game controls. This aspect of Playability is strongly connected to the Game Interface.'
intrinsic_text = 'This is the Playability inherent in the nature of the video game itself and how it is presented to the player. It is closely related to Gameplay design and Game Mechanic. In this facet, for example, we can analyze the gameâ€™s rules, goals, objectives, rhythm and other mechanics. '
interpersonal_text = 'This refers to the feelings and perceptions of users, and the group awareness that arise when a game is played in company, be it in a competitive, cooperative or collaborative way.'
mechanical_text = 'This is related to the quality of the video game as a software system. It is associated with the Game Engine, with particular emphasis on, for example, the fluency of the film scenes, correct lighting, sound, music, animated graphics and characterization, as well as communication systems in a multiplayer video game.'
output_path = '/content/drive/MyDrive/WS/similarity4.csv'  # Update with your output path


# Load data with specified encoding
data = pd.read_csv(data_path, usecols=['app_name', 'review_text'], header=0, encoding='latin1')

# Rename columns
data.columns = ['GameName', 'Text']

# Load stopwords
nltk.download('stopwords')
nltk.download("wordnet")
nltk.download("punkt")
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Tokenize and remove stopwords
    tokens = nltk.word_tokenize(text)
    tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return ' '.join(tokens)

In [None]:
# Tokenize and get embeddings using BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Get embedding of the hardware text and software text
def get_embeddings(text):
    text = preprocess_text(text)
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model(**inputs)
    return outputs['pooler_output'].detach().numpy()

# Calculate cosine similarity
def calculate_similarity(embeddings1, embeddings2):
    sim_matrix = cosine_similarity(embeddings1, embeddings2)
    return sim_matrix

# Get embeddings
intrapersonal_embeddings = get_embeddings(intrapersonal_text)
artistic_embeddings = get_embeddings(artistic_text)
interactive_embeddings = get_embeddings(interactive_text)
intrinsic_embeddings = get_embeddings(intrinsic_text)
interpersonal_embeddings = get_embeddings(interpersonal_text)
mechanical_embeddings = get_embeddings(mechanical_text)

In [None]:
# Calculate similarity for each game
results = []
for index, row in data.iterrows():
    game_name = row['GameName']
    game_text = row['Text']
    game_embeddings = get_embeddings(game_text)

    intrapersonal_similarity = calculate_similarity(intrapersonal_embeddings, game_embeddings)[0][0]
    artistic_similarity = calculate_similarity(artistic_embeddings, game_embeddings)[0][0]
    interactive_similarity = calculate_similarity(interactive_embeddings, game_embeddings)[0][0]
    intrinsic_similarity = calculate_similarity(intrinsic_embeddings, game_embeddings)[0][0]
    interpersonal_similarity = calculate_similarity(interpersonal_embeddings, game_embeddings)[0][0]
    mechanical_similarity = calculate_similarity(mechanical_embeddings, game_embeddings)[0][0]

    results.append([game_name, intrapersonal_similarity, artistic_similarity, interactive_similarity, intrinsic_similarity, interpersonal_similarity, mechanical_similarity])

In [None]:
# Create DataFrame and save results
result_df = pd.DataFrame(results, columns=['GameName', 'IntrapersonalSimilarity', 'ArtisticSimilarity', 'InteractiveSimilarity', 'IntrinsicSimilarity', 'InterpersonalSimilarity', 'MechanicalSimilarity'])
result_df.to_csv(output_path, index=False)

print("Similarity calculation and results saved successfully.")

Similarity calculation and results saved successfully.
