In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd

In [3]:
file_paths = [
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_10_16Oct2018090735_20_F.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_23_29Oct2018120805_18_F.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_36_19Nov2018090252_18_M.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_48_29Nov2018145344_19_M.dat',
    '/content/drive/MyDrive/Colab Notebooks/cognative/scamdata_8_15Oct2018090007_18_F.dat'
]

In [4]:
import pandas as pd

def load_and_preprocess_data(file_paths):
    """Loads and concatenates datasets from the given file paths."""
    return pd.concat([pd.read_csv(path, sep=',') for path in file_paths])

In [5]:
# Load and preprocess the data
all_data = load_and_preprocess_data(file_paths)

In [6]:
all_data['category'], _ = pd.factorize(all_data['category'])
all_data['type'], _ = pd.factorize(all_data['type'])
all_data['hasAtt'] = all_data['hasAtt'].astype(int)
all_data['reactTime'] = pd.qcut(all_data['reactTime'], 3, labels=False)
all_data['state'] = all_data.apply(lambda x: x['category'] * 1000 + x['type'] * 100 + x['hasAtt'] * 10 + x['reactTime'], axis=1)

In [7]:
num_states = all_data['state'].nunique()
num_actions = 2

# Initialize Q-table
Q = np.zeros((num_states, num_actions))
alpha = 0.1  # Learning rate
gamma = 0.6  # Discount factor
epsilon = 0.1  # Exploration rate


In [8]:
def calculate_reward(user_response, model_prediction):
    user_decision = 0 if user_response in [1, 2] else 1  # 1-2: safe, 3-4: scam
    return 1 if model_prediction == user_decision else -1

In [9]:

def choose_action(state_index):
    if np.random.rand() < epsilon:
        return np.random.choice(num_actions)  # Explore
    else:
        return np.argmax(Q[state_index])  # Exploit learned values


def update_q_table(state_index, action, reward, next_state_index):
    next_max = np.max(Q[next_state_index])
    Q[state_index, action] += alpha * (reward + gamma * next_max - Q[state_index, action])

In [10]:
# Step 4: Simulate training
num_episodes = 1000
for episode in range(num_episodes):
    state_index = np.random.randint(0, num_states)

    action = choose_action(state_index)

    user_decision = np.random.choice([1, 2, 3, 4])

    reward = calculate_reward(user_decision, action)

    next_state_index = np.random.randint(0, num_states)

    # Update Q-table
    update_q_table(state_index, action, reward, next_state_index)

print("Q-table after training:")
print(Q)

Q-table after training:
[[-1.67724400e-01  1.24949946e-01]
 [-1.84600000e-01 -1.38014338e-01]
 [-1.84000000e-01 -1.00000000e-01]
 [-1.00000000e-01  0.00000000e+00]
 [ 1.00000000e-02 -4.34179600e-02]
 [-1.00000000e-01 -9.40000000e-02]
 [-3.44239600e-02 -1.00000000e-01]
 [ 1.28018217e-02 -3.65491600e-02]
 [ 1.06574560e-01 -1.87750530e-02]
 [ 0.00000000e+00  2.65831600e-01]
 [-1.00000000e-01  1.90000000e-01]
 [ 1.90000000e-01  0.00000000e+00]
 [ 2.66707456e-01  0.00000000e+00]
 [-1.00000000e-01  1.90000000e-01]
 [ 1.01551600e-01 -1.00000000e-01]
 [-1.04140000e-01  7.96483184e-02]
 [-1.73554924e-01 -1.06000000e-01]
 [-9.37322680e-02  1.03111902e-01]
 [-1.05618400e-01  9.88000747e-02]
 [-2.50774368e-01 -1.06000000e-01]
 [-1.00000000e-02  9.61701137e-02]
 [-1.05618400e-01 -8.40445600e-02]
 [-1.00000000e-01  2.13402382e-01]
 [ 7.14080000e-02  0.00000000e+00]
 [-1.14598960e-01  2.74354953e-02]
 [-1.00600000e-01  3.66583984e-02]
 [-4.00000000e-03 -8.89600000e-02]
 [-7.89467560e-02 -1.84600000e-