In [1]:
import zipfile
import os

def unzip_file(zip_path, extract_to=None):
    # If no extraction path provided, unzip in the same directory as the zip file
    if extract_to is None:
        extract_to = os.path.dirname(zip_path)

    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
            print(f"Files extracted to: {extract_to}")
    except zipfile.BadZipFile:
        print("Error: The file is not a zip file or it is corrupted.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Path to the zip file
zip_path = '/content/rrl.zip'

# Call the function
unzip_file(zip_path)

Files extracted to: /content


In [2]:
import pandas as pd
from ast import literal_eval
import numpy as np
import re

# Load the data
data_df = pd.read_csv('/content/data (1).csv')
data_df['tags_tokenized'] = data_df['tags_tokenized'].apply(literal_eval)

# Initialize a dictionary to create the DataFrame for the final structure
all_tags = set(tag for tags_list in data_df['tags_tokenized'] for tag in tags_list)
data_structure = {tag: [] for tag in all_tags}
data_structure['title'] = []

# Process each row in the DataFrame
for index, row in data_df.iterrows():
    # Assuming 'tags_embedding' is a string of floats separated by spaces
    embeddings_str = row['tags_embedding']
    # Extract the numbers using regex
    float_numbers = list(map(float, re.findall(r"[-+]?\d*\.\d+|\d+", embeddings_str)))

    # Reshape the list into a 2D array, assuming each embedding has a fixed size, e.g., 6
    try:
        embeddings_array = np.array(float_numbers).reshape(-1, 6)
        tag_means = {tag: np.mean(embeddings_array, axis=0).tolist() for tag in row['tags_tokenized']}
    except ValueError:
        print(f"Reshaping error with data at index {index}")
        tag_means = {}

    # Populate data_structure with tag means or default values
    for tag in all_tags:
        data_structure[tag].append(tag_means.get(tag, [0]*6))  # Replace [0]*6 with an appropriate default value

    data_structure['title'].append(row['title'])

# Create the DataFrame
final_tags_df = pd.DataFrame(data_structure)

# Save the DataFrame to a CSV file
final_tags_df.to_csv('/content/final_tags.csv', index=False)

import pandas as pd
import numpy as np

# Load the CSV file
file_path = '/content/final_tags.csv'
data_df = pd.read_csv(file_path)

# Process each tag column to replace arrays with their means
for column in data_df.columns:
    if column != 'title':  # Assuming 'title' is the only non-tag column
        # Convert the string representation of list to actual list and calculate mean
        data_df[column] = data_df[column].apply(lambda x: np.max(eval(x)))

# Save the modified DataFrame back to a CSV file
modified_file_path = '/content/max_tags.csv'
data_df.to_csv(modified_file_path, index=False)

modified_file_path

'/content/max_tags.csv'

In [3]:
import pandas as pd

def process_and_save_facial_features(input_csv_path):
    # Load the CSV file
    df = pd.read_csv(input_csv_path)

    # Remove the 'Frame' column
    df['Label']=df['title']
    df.drop('title', axis=1, inplace=True)
    #df=df.iloc[:, -17:]
    # Save the new CSV without the header and the 'Frame' column
    updated_csv_path = '/content/facial_features_updated_no_header.csv'
    df.to_csv(updated_csv_path, header=False, index=False)

    # Create the .info file for the updated facial features dataset
    info_content = [f"{column} continuous" for column in df.columns if column != 'Label']
    info_content.append("class discrete")
    info_content.append("LABEL_POS -1")

    updated_info_path = '/content/tic-tac-toe.info'
    with open(updated_info_path, 'w') as file:
        file.write('\n'.join(info_content))

    # Load the updated CSV file to create .data file
    data_lines = df.apply(lambda row: ','.join(row.dropna().astype(str)), axis=1).tolist()

    # Save the data to a .data file
    data_file_path = '/content/tic-tac-toe.data'
    with open(data_file_path, 'w') as file:
        file.write('\n'.join(data_lines))

    return updated_csv_path, updated_info_path, data_file_path

# Example usage
input_csv_path = '/content/max_tags.csv'
updated_csv_path, updated_info_path, data_file_path = process_and_save_facial_features(input_csv_path)

In [6]:
import pandas as pd

def process_and_save_facial_features(input_csv_path):
    # Load the CSV file
    df = pd.read_csv(input_csv_path)
    print(f"Initial column count: {len(df.columns)}")  # Ensure the initial column count is as expected

    # Function to adjust the title to exactly three words, separated by commas
    def adjust_title(title):
        if not isinstance(title, str):
            title = ""  # Replace non-string values (e.g., NaN) with an empty string
        words = title.split()
        # If there are fewer than three words, repeat the words to make it three
        while len(words) < 3:
            words.extend(words[:3-len(words)])  # Ensure loop does not exceed 3 words
        return ','.join(words[:3])

    # Assuming the last 3 columns are labels
    label_columns = [df.columns[-1], df.columns[-2], df.columns[-3]]
    for label in label_columns:
        df[label] = df[label].astype(str).apply(adjust_title)

    print(f"Column count after label adjustments: {len(df.columns)}")  # Verify column count remains consistent

    # Save the new CSV without the original title columns
    updated_csv_path = '/content/facial_features_updated_no_header.csv'
    df.to_csv(updated_csv_path, header=False, index=False)

    # Create the .info file for the updated facial features dataset
    info_content = [f"{column} continuous" for column in df.columns if column not in label_columns]
    info_content.extend(["class discrete"] * 3)  # Assuming three label columns
    info_content.append("LABEL_POS -3, -2, -1")  # Indicate positions of labels

    updated_info_path = '/content/tic-tac-toe.info'
    with open(updated_info_path, 'w') as file:
        file.write('\n'.join(info_content))

    # Load the updated CSV file to create .data file
    data_lines = df.apply(lambda row: ','.join(row.dropna().astype(str)), axis=1).tolist()

    # Save the data to a .data file
    data_file_path = '/content/tic-tac-toe.data'
    with open(data_file_path, 'w') as file:
        file.write('\n'.join(data_lines))

    return updated_csv_path, updated_info_path, data_file_path

# Example usage
input_csv_path = '/content/max_tags.csv'
updated_csv_path, updated_info_path, data_file_path = process_and_save_facial_features(input_csv_path)

Initial column count: 330
Column count after label adjustments: 330


In [7]:
# trained on the tic-tac-toe data set with NLAFs.
%cd /content/rrl
!python3 /content/rrl/experiment.py -d /content/tic-tac-toe -bs 16 -s 1@64 -e150 -lrde 200 -lr 0.01 -ki 0 -i 0 -wd 0.0001 --nlaf --alpha 0.45 --beta 3 --gamma 3 --temp 0.001 --print_rule &

/content/rrl
2024-12-19 17:07:51.919963: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-19 17:07:51.940981: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-19 17:07:51.947395: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-19 17:07:51.961941: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-12-19 17:07:57.078637: E ex