## Importing Libraries

In [31]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

## Defining the dataframe

In [50]:
df = pd.read_csv('input_game.csv')

## Dividing the database with respect to players

We want to process the strategy of each player, so we have created a csv file corresponding to each player which contains all the turns in which that particular player is participating across all games. These csv files will be used later. 

Our approach is that in all the rows we have the data of the last four turns and we have to predict the output of the 5th turn. This concept is used in training and testing the neural network

In [None]:
# Group the data by 'game_id' column
grouped_data = df.groupby('game_id')

# Iterate over the groups
for game_id, group_data in grouped_data:
    print('Game ID:', game_id)
    
    # Create a new dataframe with the group
    game_df = pd.DataFrame(group_data)
    player1_id, player2_id = game_df['p1_id'].iloc[0], game_df['p2_id'].iloc[0]
    
    # Initialize new dataframes for each player's actions
    player1_actions_df, player2_actions_df = pd.DataFrame(columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'X1', 'X2', 'X3', 'X4', 'y']), pd.DataFrame(columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'X1', 'X2', 'X3', 'X4', 'y'])
    
    # Iterate over game_df, considering the last 5 rows
    for i in range(4, len(game_df)):
        # Create a row with the previous 4 values of 'p1_action' and 'p2_action' and append it to the new dataframes
        action_row = game_df.iloc[i-4:i+1].replace('CHEAT', -1).replace('TRUST', 1)
        
        # For player 1
        player1_actions_df = pd.concat([player1_actions_df, pd.DataFrame({'x1': action_row.iloc[0]['p1_action'], 'x2': action_row.iloc[0]['p2_action'], 'x3': action_row.iloc[1]['p1_action'], 'x4': action_row.iloc[1]['p2_action'], 'x5': action_row.iloc[2]['p1_action'], 'x6': action_row.iloc[2]['p2_action'], 'x7': action_row.iloc[3]['p1_action'], 'x8': action_row.iloc[3]['p2_action'], 'y': action_row.iloc[4]['p1_action']}, index=[0])])
        
        # For player 2
        player2_actions_df = pd.concat([player2_actions_df, pd.DataFrame({'x1': action_row.iloc[0]['p2_action'], 'x2': action_row.iloc[0]['p1_action'], 'x3': action_row.iloc[1]['p2_action'], 'x4': action_row.iloc[1]['p1_action'], 'x5': action_row.iloc[2]['p2_action'], 'x6': action_row.iloc[2]['p1_action'], 'x7': action_row.iloc[3]['p2_action'], 'x8': action_row.iloc[3]['p1_action'], 'y': action_row.iloc[4]['p2_action']}, index=[0])])

    # Write dataframes to CSV files, append if files already exist
    player1_actions_df.to_csv(str(player1_id) + '.csv', mode='a', header=False)
    player2_actions_df.to_csv(str(player2_id) + '.csv', mode='a', header=False)


## Defining and training the neural network

In [58]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8,1)
        #self.fc2 = nn.Linear(4,1)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        #x = F.sigmoid(self.fc2(x))
        return x

In [None]:
# Iterate over a range of file indices
for file_index in range(101, 202):
    # Read data from a CSV file
    filename = f'{file_index}.csv'
    df = pd.read_csv(filename)
    data_array = df.to_numpy()
    
    # Extract features and labels
    features = data_array[:, 1:9]
    labels = data_array[:, -1]
    labels = (labels + 1) / 2
    labels = labels.reshape(-1, 1)
    
    # Convert data to PyTorch tensors
    X = torch.tensor(features).float()
    Y = torch.tensor(labels).float()
    
    # Set hyperparameters for training
    learning_rate = 0.01
    epochs = 2000
    model = Net()
    criterion = nn.BCELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.001)
    
    # Training loop
    for epoch in range(epochs):
        # Forward pass
        outputs = model(X)
        # Compute the loss
        loss = criterion(outputs, Y)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # if epoch % 100 == 0:
        #     print("epoch: ", epoch, " loss: ", loss.item())

    # Extract parameters from the trained model
    parameters_list = []
    for param_tensor in model.state_dict():
        param_array = model.state_dict()[param_tensor].numpy().flatten()
        parameters_list.extend(param_array)

    # Convert parameters to a NumPy array and print
    parameters_array = np.array(parameters_list)
    for param in parameters_array:
        print(param, end=',')
    print()


## Clustering the data

In [43]:
df = pd.read_csv('weights.csv', header = None)
df = df.iloc[:, :-1]
data = df.values
data.shape

(200, 9)

We write the inertia and silhoutte score corresponding to each value of k (the number of clusters)
The k-value with the maximum silhoutte score is the optimal number of clusters

In [None]:
k_values = range(2, 21)

# Initialize lists to store inertia and silhouette scores
inertia_values = []
silhouette_scores = []

# Calculate inertia and silhouette score for each value of k
for k in k_values:
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(data)
    inertia_values.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(data, kmeans.labels_))

In [60]:
# Set the number of clusters observed from the graph
k = 18

# Instantiate and fit KMeans model
kmeans = KMeans(n_clusters=k)
kmeans.fit(data)

# Obtain the cluster centers (means)
means = kmeans.cluster_centers_

# Print the means (centroids) for k=18
print("Centroids for k=18:")
print(np.round(means, 3))




Centroids for k=18:
[[ 3.010e-01  4.900e-02  6.200e-02  1.300e-02  6.880e-01 -1.570e-01
   3.300e-02  2.290e-01  1.630e-01]
 [-7.450e-01 -6.940e-01 -5.880e-01 -5.490e-01 -5.270e-01 -8.380e-01
  -4.040e-01 -1.173e+00 -6.220e-01]
 [ 2.040e-01  1.900e-02  1.400e-02  1.180e-01  1.900e-02  2.050e-01
  -2.000e-03  2.196e+00  2.500e-02]
 [ 1.657e+00 -1.000e-03 -4.040e-01  8.500e-02 -4.150e-01 -2.800e-02
  -4.210e-01 -1.000e-02  1.061e+00]
 [ 6.070e-01  1.650e-01  5.650e-01  2.020e-01  5.320e-01  2.370e-01
   8.080e-01  3.270e-01 -4.900e-01]
 [ 2.530e-01  3.050e-01  4.900e-01  2.760e-01  3.040e-01  3.080e-01
   5.380e-01 -2.059e+00  1.520e-01]
 [ 1.670e-01 -3.000e-02 -1.112e+00  3.500e-02 -4.100e-02 -1.000e-03
   1.293e+00 -3.300e-02 -2.300e-01]
 [ 3.530e-01 -1.800e-02  1.820e-01  8.900e-02  6.390e-01 -3.920e-01
  -8.490e-01  1.517e+00  9.630e-01]
 [ 1.260e-01 -1.700e-02  3.090e-01 -1.000e-03  6.810e-01  1.500e-02
   1.598e+00  4.000e-03  3.800e-02]
 [ 1.120e-01  1.490e-01  1.900e-01  1.380e-0

In [48]:
# Define the number of clusters
k = 18

# Instantiate and fit KMeans model
kmeans = KMeans(n_clusters=k)
kmeans.fit(data)

# Retrieve the cluster labels
labels = kmeans.labels_

# Initialize a dictionary to store row numbers for each cluster
cluster_rows = {i: [] for i in range(k)}

# Assign each data point to its corresponding cluster
for i, label in enumerate(labels):
    # Adjust row numbering for consistency
    row_number = i + 1 if i < 100 else i + 2
    cluster_rows[label].append(row_number)

# Sort the cluster rows
sorted_clusters = sorted(cluster_rows.values())

# Print the sorted cluster rows
for cluster, rows in enumerate(sorted_clusters):
    print(f"Cluster {cluster}:", rows)




Cluster 0: [1, 5, 7, 19, 27, 39, 76, 87, 125, 143, 162, 183]
Cluster 1: [2, 3, 20, 118, 126, 138, 149, 150, 190]
Cluster 2: [4, 14, 22, 23, 38, 64, 85, 96, 97, 105, 129, 148, 158, 167, 197]
Cluster 3: [6, 8, 31, 43, 61, 99, 127, 133, 139, 186, 191, 196]
Cluster 4: [9, 13, 60, 95, 104, 121, 128, 137, 175, 178, 181, 192, 193, 194]
Cluster 5: [10, 11, 28, 41, 44, 75, 106, 112, 130, 135, 153, 163, 164, 198]
Cluster 6: [12, 45, 69, 74, 80, 86, 100, 103, 154, 161, 171, 176, 199]
Cluster 7: [15, 25, 33, 57, 108, 110, 111, 115, 136, 155, 172]
Cluster 8: [16, 18, 81, 122, 160]
Cluster 9: [17, 35, 46, 47, 51, 52, 63, 70, 82, 107, 134, 165, 177, 195, 201]
Cluster 10: [21, 29, 59, 62, 84, 91, 109, 117, 132, 140]
Cluster 11: [24, 32, 50, 68, 71, 83, 114, 119, 124, 156, 173, 187]
Cluster 12: [26, 40, 48, 49, 54, 66, 90, 98, 102, 120, 123, 147, 151, 170, 184]
Cluster 13: [30, 42, 67, 77, 79, 93, 113, 159, 174, 179, 180]
Cluster 14: [34, 56, 58, 88, 144, 152, 157, 182]
Cluster 15: [36, 37, 53, 55, 65,