# Academy CSV Files

In [41]:
import boto3
import pandas as pd
import json
import re 

In [42]:
s3 = boto3.client('s3')

In [43]:
def list_all_objects(bucket, prefix):
    all_objects = []
    continuation_token = None
    
    while True:
        if continuation_token:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=continuation_token)
        else:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        
        if 'Contents' in response:
            all_objects.extend([obj['Key'] for obj in response['Contents']])
        
        if not response.get('NextContinuationToken'):
            break
        
        continuation_token = response['NextContinuationToken']
    
    return all_objects

In [44]:
def list_csv_files(bucket, prefix):
    csv_files = []
    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    if 'Contents' in response:
        for obj in response['Contents']:
            if obj['Key'].endswith('.csv'):
                csv_files.append(obj['Key'])
    return csv_files

In [45]:
def read_csv_from_s3(bucket, file_key):
    response = s3.get_object(Bucket=bucket, Key=file_key)
    df = pd.read_csv(response['Body'])
    return df

In [46]:
def convert_floats_to_ints(df):
    for column in df.columns:
        if df[column].dtype == 'float64':
            df[column] = df[column].fillna(0).astype(int)
    return df

In [47]:
bucket_name = 'data-402-final-project'
prefix = 'Academy/'

In [48]:
csv_files = list_csv_files(bucket_name, prefix)

In [49]:
if csv_files:
    # View contents of the first CSV file
    first_csv_file = csv_files[0]
    academy_csv_files = read_csv_from_s3(bucket_name, first_csv_file)
    print("Contents of the first CSV file:")
    print(academy_csv_files)
    
    # Convert floats to ints
    academy_csv_files = convert_floats_to_ints(academy_csv_files)
    print("\nContents of the first CSV file after converting floats to ints:")
    print(academy_csv_files)
else:
    print("No CSV files found in the Academy folder.")

Contents of the first CSV file:
               name       trainer  Analytic_W1  Independent_W1  Determined_W1  Professional_W1  Studious_W1  Imaginative_W1  Analytic_W2  Independent_W2  Determined_W2  Professional_W2  Studious_W2  Imaginative_W2  Analytic_W3  Independent_W3  Determined_W3  Professional_W3  Studious_W3  Imaginative_W3  Analytic_W4  Independent_W4  Determined_W4  Professional_W4  Studious_W4  Imaginative_W4  Analytic_W5  Independent_W5  Determined_W5  Professional_W5  Studious_W5  Imaginative_W5  Analytic_W6  Independent_W6  Determined_W6  Professional_W6  Studious_W6  Imaginative_W6  Analytic_W7  Independent_W7  Determined_W7  Professional_W7  Studious_W7  Imaginative_W7  Analytic_W8  Independent_W8  Determined_W8  Professional_W8  Studious_W8  Imaginative_W8
0   Quintus Penella  Gregor Gomez            1               2              2                1            2               2          NaN             NaN            NaN              NaN          NaN             NaN 

# Talent JSON Files

In [50]:
def get_files_by_extension(bucket, prefix, extension):
    all_files = list_all_objects(bucket, prefix)
    filtered_files = [file for file in all_files if file.endswith(extension)]
    return filtered_files

In [51]:
def show_json_content(bucket, file_key):
    # Retrieve the JSON file from S3
    response = s3.get_object(Bucket=bucket, Key=file_key)
    # Read the JSON file content
    json_content = json.loads(response['Body'].read())
    print(json.dumps(json_content, indent=4))

In [52]:
def load_json_files(bucket, json_files):
    data_list = []
    for file_key in json_files:
        response = s3.get_object(Bucket=bucket, Key=file_key)
        json_content = json.loads(response['Body'].read())
        data_list.append(json_content)
    
    return pd.json_normalize(data_list)

In [53]:
bucket_name = 'data-402-final-project'
prefix = 'Talent/'

In [54]:
json_files = get_files_by_extension(bucket_name, prefix, '.json')

In [55]:
print("JSON files found:", json_files)

JSON files found: ['Talent/10383.json', 'Talent/10384.json', 'Talent/10385.json', 'Talent/10386.json', 'Talent/10387.json', 'Talent/10388.json', 'Talent/10389.json', 'Talent/10390.json', 'Talent/10391.json', 'Talent/10392.json', 'Talent/10393.json', 'Talent/10394.json', 'Talent/10395.json', 'Talent/10396.json', 'Talent/10397.json', 'Talent/10398.json', 'Talent/10399.json', 'Talent/10400.json', 'Talent/10401.json', 'Talent/10402.json', 'Talent/10403.json', 'Talent/10404.json', 'Talent/10405.json', 'Talent/10406.json', 'Talent/10407.json', 'Talent/10408.json', 'Talent/10409.json', 'Talent/10410.json', 'Talent/10411.json', 'Talent/10412.json', 'Talent/10413.json', 'Talent/10414.json', 'Talent/10415.json', 'Talent/10416.json', 'Talent/10417.json', 'Talent/10418.json', 'Talent/10419.json', 'Talent/10420.json', 'Talent/10421.json', 'Talent/10422.json', 'Talent/10423.json', 'Talent/10424.json', 'Talent/10425.json', 'Talent/10426.json', 'Talent/10427.json', 'Talent/10428.json', 'Talent/10429.j

In [56]:
if json_files:
    print("\nContents of the first JSON file:")
    show_json_content(bucket_name, json_files[0])
else:
    print("No JSON files found.")


Contents of the first JSON file:
{
    "name": "Stillmann Castano",
    "date": "22/08/2019",
    "tech_self_score": {
        "C#": 6,
        "Java": 5,
        "R": 2,
        "JavaScript": 2
    },
    "strengths": [
        "Charisma"
    ],
    "weaknesses": [
        "Distracted",
        "Impulsive",
        "Introverted"
    ],
    "self_development": "Yes",
    "geo_flex": "Yes",
    "financial_support_self": "Yes",
    "result": "Pass",
    "course_interest": "Business"
}


In [57]:
if json_files:
    json_df = load_json_files(bucket_name, json_files)
    
    # Convert date field to date object, handle errors and coerce invalid formats
    json_df['date'] = pd.to_datetime(json_df['date'], format='%d/%m/%Y', errors='coerce').dt.date
    
    # Convert tech self-scores to integers
    tech_score_cols = [col for col in json_df.columns if col.startswith('tech_self_score')]
    json_df[tech_score_cols] = json_df[tech_score_cols].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)
    
    print("JSON DataFrame:")
    print(json_df.head())
else:
    print("No JSON files found.")

JSON DataFrame:
                name        date                            strengths                            weaknesses self_development geo_flex financial_support_self result course_interest  tech_self_score.C#  tech_self_score.Java  tech_self_score.R  tech_self_score.JavaScript  tech_self_score.Python  tech_self_score.C++  tech_self_score.Ruby  tech_self_score.SPSS  tech_self_score.PHP
0  Stillmann Castano  2019-08-22                           [Charisma]  [Distracted, Impulsive, Introverted]              Yes      Yes                    Yes   Pass        Business                   6                     5                  2                           2                       0                    0                     0                     0                    0
1    Hilary Willmore  2019-08-01  [Patient, Curious, Problem Solving]    [Overbearing, Chatty, Indifferent]               No      Yes                    Yes   Fail            Data                   4                     2     

In [58]:
if 'json_df' in locals():
    print(json_df)
else:
    print("JSON DataFrame not created.")

                               name        date                                         strengths                                      weaknesses self_development geo_flex financial_support_self result course_interest  tech_self_score.C#  tech_self_score.Java  tech_self_score.R  tech_self_score.JavaScript  tech_self_score.Python  tech_self_score.C++  tech_self_score.Ruby  tech_self_score.SPSS  tech_self_score.PHP
0                 Stillmann Castano  2019-08-22                                        [Charisma]            [Distracted, Impulsive, Introverted]              Yes      Yes                    Yes   Pass        Business                   6                     5                  2                           2                       0                    0                     0                     0                    0
1                   Hilary Willmore  2019-08-01               [Patient, Curious, Problem Solving]              [Overbearing, Chatty, Indifferent]               No    

In [59]:
talent_json_files = json_df

# Talent TXT Files

In [60]:
def list_txt_files(bucket, prefix):
    all_files = list_all_objects(bucket, prefix)
    txt_files = [file for file in all_files if file.endswith('.txt')]
    return txt_files

# Replace 'your-bucket-name' and 'your-prefix' with your bucket and prefix
bucket_name = 'data-402-final-project'
prefix = 'Talent/'

txt_files = list_txt_files(bucket_name, prefix)

print("List of .txt file titles:")
for txt_file in txt_files:
    print(txt_file)

def get_txt_file_contents(bucket, key):
    # Retrieve the object from S3
    response = s3.get_object(Bucket=bucket, Key=key)
    # Read the content of the file
    content = response['Body'].read().decode('utf-8')
    return content

# Replace 'your-bucket-name' with the name of your bucket
bucket_name = 'data-402-final-project'
# Replace 'your-file-key.txt' with the key of the file you want to read
file_key = 'Talent/Sparta Day 15 October 2019.txt'

# Get the contents of the specified .txt file
file_contents = get_txt_file_contents(bucket_name, file_key)

# Print the contents of the file
print(file_contents)



List of .txt file titles:
Talent/Sparta Day 1 August 2019.txt
Talent/Sparta Day 1 May 2019.txt
Talent/Sparta Day 1 October 2019.txt
Talent/Sparta Day 10 April 2019.txt
Talent/Sparta Day 10 December 2019.txt
Talent/Sparta Day 10 January 2019.txt
Talent/Sparta Day 10 July 2019.txt
Talent/Sparta Day 10 October 2019.txt
Talent/Sparta Day 10 September 2019.txt
Talent/Sparta Day 11 April 2019.txt
Talent/Sparta Day 11 December 2019.txt
Talent/Sparta Day 11 July 2019.txt
Talent/Sparta Day 11 June 2019.txt
Talent/Sparta Day 11 September 2019.txt
Talent/Sparta Day 12 December 2019.txt
Talent/Sparta Day 12 February 2019.txt
Talent/Sparta Day 12 June 2019.txt
Talent/Sparta Day 12 March 2019.txt
Talent/Sparta Day 12 November 2019.txt
Talent/Sparta Day 12 September 2019.txt
Talent/Sparta Day 13 August 2019.txt
Talent/Sparta Day 13 February 2019.txt
Talent/Sparta Day 13 June 2019.txt
Talent/Sparta Day 13 March 2019.txt
Talent/Sparta Day 13 November 2019.txt
Talent/Sparta Day 14 August 2019.txt
Talent

In [61]:
def parse_txt_content(content):
    lines = content.strip().split('\n')
    date = lines[0]
    academy = lines[1]
    data = []
    for line in lines[2:]:
        match = re.match(r'(.+?) -\s+Psychometrics:\s+(\d+)/100,\s+Presentation:\s+(\d+)/32', line)
        if match:
            name = match.group(1).strip().title()
            psychometrics = match.group(2).strip()
            presentation = match.group(3).strip()
            data.append([date, academy, name, psychometrics, presentation])
    return data

def combine_txt_files(bucket, prefix):
    txt_files = list_txt_files(bucket, prefix)
    all_data = []
    for txt_file in txt_files:
        content = get_txt_file_contents(bucket, txt_file)
        file_data = parse_txt_content(content)
        all_data.extend(file_data)
    
    df = pd.DataFrame(all_data, columns=['date', 'academy', 'name', 'psychometric_score', 'presentation_score'])
    return df



In [62]:
# Replace 'your-bucket-name' with the name of your bucket
bucket_name = 'data-402-final-project'
prefix = 'Talent/'

# Combine all txt files into a single DataFrame
talent_txt_files = combine_txt_files(bucket_name, prefix)

# Display the combined DataFrame
print(talent_txt_files)

# Save the combined DataFrame to a CSV file (optional)
talent_txt_files.to_csv('combined_data.csv', index=False)

                               date               academy                           name psychometric_score presentation_score
0          Thursday 1 August 2019\r  Birmingham Academy\r                Hilary Willmore                 51                 19
1          Thursday 1 August 2019\r  Birmingham Academy\r                    Orly Lorens                 51                 19
2          Thursday 1 August 2019\r  Birmingham Academy\r                 Alvie Bleackly                 55                 16
3          Thursday 1 August 2019\r  Birmingham Academy\r                  Deck Itzchaki                 59                 21
4          Thursday 1 August 2019\r  Birmingham Academy\r                   Wilt Penritt                 66                 25
5          Thursday 1 August 2019\r  Birmingham Academy\r                 Gianna Clevely                 61                 16
6          Thursday 1 August 2019\r  Birmingham Academy\r                    Yvor Phalip                 59    

In [63]:
# Clean the combined DataFrame
talent_txt_files['date'] = talent_txt_files['date'].str.strip()
talent_txt_files['academy'] = talent_txt_files['academy'].str.strip()
talent_txt_files['name'] = talent_txt_files['name'].str.strip()
talent_txt_files['psychometric_score'] = talent_txt_files['psychometric_score'].astype(int)
talent_txt_files['presentation_score'] = talent_txt_files['presentation_score'].astype(int)

# Display the combined DataFrame
print(talent_txt_files)

# Save the combined DataFrame to a CSV file (optional)
talent_txt_files.to_csv('talent_txt_files.csv', index=False)

                             date             academy                           name  psychometric_score  presentation_score
0          Thursday 1 August 2019  Birmingham Academy                Hilary Willmore                  51                  19
1          Thursday 1 August 2019  Birmingham Academy                    Orly Lorens                  51                  19
2          Thursday 1 August 2019  Birmingham Academy                 Alvie Bleackly                  55                  16
3          Thursday 1 August 2019  Birmingham Academy                  Deck Itzchaki                  59                  21
4          Thursday 1 August 2019  Birmingham Academy                   Wilt Penritt                  66                  25
5          Thursday 1 August 2019  Birmingham Academy                 Gianna Clevely                  61                  16
6          Thursday 1 August 2019  Birmingham Academy                    Yvor Phalip                  59                  18


In [64]:
# Set pandas display options to show more columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)

In [65]:
# Check for missing values in the combined DataFrame
missing_values = talent_txt_files.isnull().sum()

# Print the missing values
print("Missing values in talent_txt_files:")
print(missing_values)

Missing values in talent_txt_files:
date                  0
academy               0
name                  0
psychometric_score    0
presentation_score    0
dtype: int64


In [66]:
# Convert the 'date' column to date format
talent_txt_files['date'] = pd.to_datetime(talent_txt_files['date']).dt.date

# Verify the changes
print(talent_txt_files.dtypes)

date                  object
academy               object
name                  object
psychometric_score     int32
presentation_score     int32
dtype: object


In [67]:
# Iterate through the DataFrame and print each row
for index, row in talent_txt_files.iterrows():
    print(f"{row['date']}  {row['academy']}  {row['name']}  {row['psychometric_score']}  {row['presentation_score']}")

2019-08-01  Birmingham Academy  Hilary Willmore  51  19
2019-08-01  Birmingham Academy  Orly Lorens  51  19
2019-08-01  Birmingham Academy  Alvie Bleackly  55  16
2019-08-01  Birmingham Academy  Deck Itzchaki  59  21
2019-08-01  Birmingham Academy  Wilt Penritt  66  25
2019-08-01  Birmingham Academy  Gianna Clevely  61  16
2019-08-01  Birmingham Academy  Yvor Phalip  59  18
2019-08-01  Birmingham Academy  Marielle Knivett  52  18
2019-08-01  Birmingham Academy  Florri Loughton  63  28
2019-08-01  Birmingham Academy  Walden Gidden  49  25
2019-08-01  Birmingham Academy  Ingunna Adin  60  20
2019-08-01  Birmingham Academy  Janaya Mawford  52  18
2019-08-01  Birmingham Academy  Cornall Offer  56  26
2019-08-01  Birmingham Academy  Brnaby Bownes  59  26
2019-08-01  Birmingham Academy  Sherline Cudihy  52  16
2019-08-01  Birmingham Academy  Anallise Scathard  62  13
2019-08-01  Birmingham Academy  Benson Labuschagne  64  13
2019-08-01  Birmingham Academy  Norene Fierro  58  22
2019-08-01  B

# Talent CSV Files 

In [68]:
def get_csv_files(bucket, prefix):
    all_files = list_all_objects(bucket, prefix)
    talent_csv_files = [file for file in all_files if file.endswith('.csv')]
    return talent_csv_files

In [69]:
def show_csv_content(bucket, file_key):
    # Retrieve the CSV file from S3
    response = s3.get_object(Bucket=bucket, Key=file_key)
    # Read the CSV file content
    csv_content = pd.read_csv(response['Body'])
    
    # Convert 'dob' column to datetime
    csv_content['dob'] = pd.to_datetime(csv_content['dob'], format='%d/%m/%Y', errors='coerce')
    
    # Convert 'month' column to datetime
    csv_content['month'] = pd.to_datetime(csv_content['month'], format='%B %Y', errors='coerce')
    
    # Format 'invited_date' with the correct ordinal suffix
    def ordinal_suffix(day):
        if 10 <= day % 100 <= 20:
            suffix = 'th'
        else:
            suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(day % 10, 'th')
        return str(day) + suffix

    csv_content['invited_date'] = csv_content['invited_date'].dropna().astype(int).apply(ordinal_suffix)
    
    print(csv_content)


bucket_name = 'data-402-final-project'
prefix = 'Talent/'

# Get all CSV files
talent_csv_files = get_csv_files(bucket_name, prefix)

In [70]:
# Print the list of CSV files
print("CSV files found:", talent_csv_files)

CSV files found: ['Talent/April2019Applicants.csv', 'Talent/Aug2019Applicants.csv', 'Talent/Dec2019Applicants.csv', 'Talent/Feb2019Applicants.csv', 'Talent/Jan2019Applicants.csv', 'Talent/July2019Applicants.csv', 'Talent/June2019Applicants.csv', 'Talent/March2019Applicants.csv', 'Talent/May2019Applicants.csv', 'Talent/Nov2019Applicants.csv', 'Talent/Oct2019Applicants.csv', 'Talent/Sept2019Applicants.csv']


In [71]:
# Show the content of one CSV file
if talent_csv_files:
    print("\nContents of the first CSV file:")
    show_csv_content(bucket_name, talent_csv_files[0])
else:
    print("No CSV files found.")


Contents of the first CSV file:
      id                     name  gender        dob                               email        city                       address postcode        phone_number                                                uni degree invited_date      month        invited_by
0      1           Esme Trusslove  Female 1994-08-04               etrusslove0@google.es     Swindon          22056 Lerdahl Avenue      SN1    +44-295-783-0228  Saint George's Hospital Medical School, Univer...    2:1         10th 2019-04-01   Bruno Bellbrook
1      2          Matthaeus Audas    Male        NaT                maudas1@mapquest.com    Charlton              263 Nelson Trail     OX12    +44-957-728-0155                                   Keele University    2:1         30th 2019-04-01    Doris Bellasis
2      3          Cherey Tollfree  Female 1992-12-08             ctollfree2@netvibes.com      Weston              69 Coleman Court     GU32    +44-588-749-6002        King's College Londo

# Merged Data Frame 

In [80]:
# Display columns for academy_csv_files
print("Columns in academy_csv_files:")
print(academy_csv_files.columns.tolist())

# Display columns for talent_json_files
print("\nColumns in talent_json_files:")
print(talent_json_files.columns.tolist())

# Display columns for each DataFrame in talent_csv_files
print("\nColumns in talent_csv_files:")
for i, df in enumerate(talent_csv_files, start=1):
    print(f"Columns in talent_csv_files[{i}]:")
    print(df.columns.tolist())

# Display columns for talent_txt_files
print("\nColumns in talent_txt_files:")
print(talent_txt_files.columns.tolist())


Columns in academy_csv_files:
['name', 'trainer', 'Analytic_W1', 'Independent_W1', 'Determined_W1', 'Professional_W1', 'Studious_W1', 'Imaginative_W1', 'Analytic_W2', 'Independent_W2', 'Determined_W2', 'Professional_W2', 'Studious_W2', 'Imaginative_W2', 'Analytic_W3', 'Independent_W3', 'Determined_W3', 'Professional_W3', 'Studious_W3', 'Imaginative_W3', 'Analytic_W4', 'Independent_W4', 'Determined_W4', 'Professional_W4', 'Studious_W4', 'Imaginative_W4', 'Analytic_W5', 'Independent_W5', 'Determined_W5', 'Professional_W5', 'Studious_W5', 'Imaginative_W5', 'Analytic_W6', 'Independent_W6', 'Determined_W6', 'Professional_W6', 'Studious_W6', 'Imaginative_W6', 'Analytic_W7', 'Independent_W7', 'Determined_W7', 'Professional_W7', 'Studious_W7', 'Imaginative_W7', 'Analytic_W8', 'Independent_W8', 'Determined_W8', 'Professional_W8', 'Studious_W8', 'Imaginative_W8']

Columns in talent_json_files:
['name', 'date', 'strengths', 'weaknesses', 'self_development', 'geo_flex', 'financial_support_self', '

In [79]:
for df in talent_csv_files:
    print(type(df))

def read_csv_files(bucket, files):
    dfs = []
    for file in files:
        response = s3.get_object(Bucket=bucket, Key=file)
        df = pd.read_csv(response['Body'])
        dfs.append(df)
    return dfs

talent_csv_files = read_csv_files(bucket_name, csv_files)

# Merge academy_csv_files with talent_json_files based on the "name" column
merged_df = pd.merge(academy_csv_files, talent_json_files, on='name', how='outer', suffixes=('_academy', '_json'))

for df in talent_csv_files:
    merged_df = pd.merge(merged_df, df, on='name', how='outer')

# Merge talent_txt_files with the merged DataFrame based on the "name" column
merged_df = pd.merge(merged_df, talent_txt_files, on='name', how='outer', suffixes=('_merged', '_talent_txt'))



<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pand

MergeError: Passing 'suffixes' which cause duplicate columns {'Professional_W8_x', 'Determined_W2_x', 'Analytic_W6_x', 'Professional_W2_x', 'Determined_W1_x', 'Professional_W6_x', 'Imaginative_W3_x', 'Analytic_W1_x', 'Independent_W3_x', 'Professional_W3_x', 'Determined_W3_x', 'trainer_x', 'Analytic_W3_x', 'Studious_W3_x', 'Determined_W7_x', 'Analytic_W2_x', 'Independent_W4_x', 'Imaginative_W4_x', 'Studious_W6_x', 'Determined_W4_x', 'Professional_W1_x', 'Studious_W1_x', 'Determined_W6_x', 'Independent_W8_x', 'Studious_W7_x', 'Determined_W8_x', 'Independent_W6_x', 'Independent_W7_x', 'Independent_W5_x', 'Determined_W5_x', 'Imaginative_W6_x', 'Professional_W7_x', 'Analytic_W4_x', 'Analytic_W5_x', 'Analytic_W8_x', 'Professional_W4_x', 'Analytic_W7_x', 'Independent_W2_x', 'Imaginative_W5_x', 'Studious_W8_x', 'Studious_W4_x', 'Independent_W1_x', 'Imaginative_W8_x', 'Studious_W5_x', 'Imaginative_W1_x', 'Imaginative_W2_x', 'Imaginative_W7_x', 'Professional_W5_x', 'Studious_W2_x'} is not allowed.