In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

In [None]:
file_paths = [
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_10_16Oct2018090735_20_F.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_23_29Oct2018120805_18_F.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_36_19Nov2018090252_18_M.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_48_29Nov2018145344_19_M.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_8_15Oct2018090007_18_F.dat'
]

In [None]:
import pandas as pd

def load_and_preprocess_data(file_paths):
    """Loads and concatenates datasets from the given file paths."""
    return pd.concat([pd.read_csv(path, sep=',') for path in file_paths])

def analyze_user_responses(data):
    """Analyzes user responses to identify potential biases."""
    # Aggregate user responses
    response_counts = data['userId'].value_counts()

    # Analyze biases towards email categories, types, and attachments
    category_bias = data.groupby('category')['userId'].value_counts().unstack().fillna(0)
    type_bias = data.groupby('type')['userId'].value_counts().unstack().fillna(0)
    attachment_bias = data.groupby('hasAtt')['userId'].value_counts().unstack().fillna(0)

    return response_counts, category_bias, type_bias, attachment_bias

In [None]:
# Load and preprocess the data
all_data = load_and_preprocess_data(file_paths)

In [None]:
all_data['category'], _ = pd.factorize(all_data['category'])
all_data['type'], _ = pd.factorize(all_data['type'])
all_data['hasAtt'] = all_data['hasAtt'].astype(int)
all_data['reactTime'] = pd.qcut(all_data['reactTime'], 3, labels=False)
all_data['state'] = all_data.apply(lambda x: x['category'] * 1000 + x['type'] * 100 + x['hasAtt'] * 10 + x['reactTime'], axis=1)

In [None]:
num_states = all_data['state'].nunique()
num_actions = 4

# Initialize Q-table
Q = np.zeros((num_states, num_actions))

# Learning parameters
alpha = 0.1  # Learning rate
gamma = 0.6  # Discount factor
epsilon = 0.1  # Exploration rate


In [None]:
def choose_action(state_index):
    if np.random.rand() < epsilon:
        return np.random.choice(num_actions)  # Explore
    else:
        return np.argmax(Q[state_index])  # Exploit learned values

# Function to update the Q-table
def update_q_table(state_index, action, reward, next_state_index):
    next_max = np.max(Q[next_state_index])
    Q[state_index, action] = Q[state_index, action] + alpha * (reward + gamma * next_max - Q[state_index, action])


In [None]:
# Simulate state transitions and rewards
state_to_index = {state: idx for idx, state in enumerate(all_data['state'].unique())}
num_episodes = 1000  # For the example, adjust based on your needs

In [None]:
for _ in range(num_episodes):
    state = np.random.choice(all_data['state'])
    state_index = state_to_index[state]
    action = choose_action(state_index)
    reward = np.random.choice([1, -1])
    next_state = np.random.choice(all_data['state'])
    next_state_index = state_to_index[next_state]
    update_q_table(state_index, action, reward, next_state_index)


In [None]:
# Example output
print("Q-table after training:")
print(Q)

Q-table after training:
[[-0.08191521 -0.09561849 -0.094      -0.08264965]
 [-0.094       0.29941612  0.          0.        ]
 [-0.14053295 -0.08784773 -0.09422025 -0.1       ]
 [-0.19        0.05189442 -0.10255192 -0.1       ]
 [ 0.28632796  0.          0.          0.        ]
 [-0.1        -0.07792525  0.          0.        ]
 [-0.01813658  0.          0.          0.        ]
 [-0.026866   -0.1         0.0202889  -0.08374086]
 [-0.1761484  -0.16473191 -0.05886366  0.48356465]
 [-0.09924541  0.14491821  0.          0.        ]
 [-0.08870793  0.194041    0.          0.        ]
 [-0.01346156 -0.10355433 -0.0455277  -0.18590356]
 [-0.09936069 -0.094       0.          0.        ]
 [-0.15751764 -0.08443114 -0.04553951  0.41984725]
 [-0.1        -0.08604004  0.         -0.1       ]
 [-0.005086   -0.1        -0.07442103  0.        ]
 [ 0.265924    0.          0.06073332  0.        ]
 [-0.0993004  -0.1         0.          0.        ]
 [ 0.21120474 -0.09955576 -0.094      -0.09545554]
 [ 0.1 