In [285]:
!pip install boto3



In [286]:
import boto3
import pandas as pd
import json
import csv

In [287]:
s3 = boto3.client('s3')

In [288]:
def list_objects(bucket, prefix):
    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    if 'Contents' in response:
        return [obj['Key'] for obj in response['Contents'] if obj['Key'] != prefix]
    return []

In [289]:
def list_all_objects(bucket, prefix):
    all_objects = []
    continuation_token = None
    
    while True:
        if continuation_token:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=continuation_token)
        else:
            response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        
        if 'Contents' in response:
            all_objects.extend([obj['Key'] for obj in response['Contents']])
        
        if not response.get('NextContinuationToken'):
            break
        
        continuation_token = response['NextContinuationToken']
    
    return all_objects

In [290]:
def load_academy_data(bucket, prefix):
    files = list_all_objects(bucket, prefix)
    data_frames = []
    for file_key in files:
        obj = s3.get_object(Bucket=bucket, Key=file_key)
        df = pd.read_csv(obj['Body'])
        data_frames.append(df)
    return pd.concat(data_frames, ignore_index=True)

In [291]:
def load_talent_data(bucket, prefix):
    files = list_all_objects(bucket, prefix)
    records = []
    for file_key in files:
        obj = s3.get_object(Bucket=bucket, Key=file_key)
        content = obj['Body'].read().decode('utf-8')
        try:
            # Attempt to load the content as JSON
            data = json.loads(content)
            records.append(data)
        except json.JSONDecodeError:
            # If content is not JSON, try to parse as CSV
            try:
                csv_reader = csv.DictReader(content.splitlines())
                # Convert CSV rows to dicts & append to records list
                for row in csv_reader:
                    records.append(row)
            except Exception as e:
                print(f"Error loading file {file_key}: {e}")
                # Handle the error appropriately,-> skip the file or handle it differently
    return pd.DataFrame(records)


In [292]:
academy_data = load_academy_data('data-402-final-project', 'Academy/')
talent_data = load_talent_data('data-402-final-project', 'Talent/')

In [293]:
# Extract unique names from academy_data and talent_data
academy_names = set(academy_data['name'])
talent_names = set(talent_data['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [294]:
print("Dimensions of Academy Data:")
print(f"Number of rows: {academy_data.shape[0]}")
print(f"Number of columns: {academy_data.shape[1]}")

Dimensions of Academy Data:
Number of rows: 397
Number of columns: 62


In [295]:
print("\nDimensions of Talent Data:")
print(f"Number of rows: {talent_data.shape[0]}")
print(f"Number of columns: {talent_data.shape[1]}")


Dimensions of Talent Data:
Number of rows: 12082
Number of columns: 176


In [296]:
print("Academy Data:")
print(academy_data.head())

Academy Data:
              name       trainer  Analytic_W1  Independent_W1  Determined_W1  \
0  Quintus Penella  Gregor Gomez            1               2              2   
1     Simon Murrey  Gregor Gomez            6               1              1   
2      Gustaf Lude  Gregor Gomez            6               4              1   
3    Yolanda Fosse  Gregor Gomez            2               1              2   
4     Lynnett Swin  Gregor Gomez            2               2              4   

   Professional_W1  Studious_W1  Imaginative_W1  Analytic_W2  Independent_W2  \
0                1            2               2          NaN             NaN   
1                2            4               2          3.0             1.0   
2                1            2               3          1.0             1.0   
3                3            3               3          4.0             2.0   
4                5            1               2          3.0             2.0   

   ...  Determined_W9  P

In [297]:
print("\nTalent Data:")
print(talent_data.head())


Talent Data:
                name        date  \
0  Stillmann Castano  22/08/2019   
1    Hilary Willmore  01/08/2019   
2      Efrem Whipple  22/08/2019   
3        Sydel Fenne  28/08/2019   
4    Michel Lebarree  07/08/2019   

                                     tech_self_score  \
0      {'C#': 6, 'Java': 5, 'R': 2, 'JavaScript': 2}   
1        {'Python': 1, 'C#': 4, 'Java': 2, 'C++': 4}   
2                              {'Ruby': 4, 'C++': 4}   
3                             {'Java': 3, 'SPSS': 4}   
4  {'Python': 3, 'Java': 4, 'Ruby': 1, 'R': 2, 'P...   

                             strengths                            weaknesses  \
0                           [Charisma]  [Distracted, Impulsive, Introverted]   
1  [Patient, Curious, Problem Solving]    [Overbearing, Chatty, Indifferent]   
2    [Courteous, Independent, Patient]     [Introverted, Impulsive, Anxious]   
3                         [Passionate]            [Perfectionist, Sensitive]   
4                          [Vers

In [298]:
print("\nMissing values in Academy Data:")
print(academy_data.isnull().sum())



Missing values in Academy Data:
name                  0
trainer               0
Analytic_W1           0
Independent_W1        0
Determined_W1         0
                   ... 
Independent_W10     235
Determined_W10      235
Professional_W10    235
Studious_W10        235
Imaginative_W10     235
Length: 62, dtype: int64


In [299]:
academy_data_filled = academy_data.fillna(0)

In [300]:
print("\nMissing values in Talent Data:")
print(talent_data.isnull().sum())


Missing values in Talent Data:
name                         4286
date                         8977
tech_self_score              9032
strengths                    8977
weaknesses                   8977
                            ...  
Tuesday 9 April 2019        12047
Wednesday 9 January 2019    12060
Tuesday 9 July 2019         12042
Thursday 9 May 2019         12057
Wednesday 9 October 2019    12061
Length: 176, dtype: int64


In [301]:
talent_data_filled = talent_data.fillna({
    'tech_self_score': '{}', 
    'strengths': '[]', 
    'weaknesses': '[]',
    'self_development': 'No', 
    'geo_flex': 'No', 
    'financial_support_self': 'No', 
    'course_interest': 'None'
})

In [302]:
# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_filled
['name'])
talent_names = set(talent_data_filled
['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [303]:
# Convert dicts and lists to strings in specified columns
talent_data_filled['tech_self_score'] = talent_data_filled['tech_self_score'].apply(json.dumps)
talent_data_filled['strengths'] = talent_data_filled['strengths'].apply(json.dumps)
talent_data_filled['weaknesses'] = talent_data_filled['weaknesses'].apply(json.dumps)

In [304]:
############ THIS IS A TESTTTR!!!!!!!!!!

# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_filled['name'])
talent_names = set(talent_data_filled['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [305]:
# Convert back

import ast

talent_data_filled['tech_self_score'] = talent_data_filled['tech_self_score'].apply(ast.literal_eval)
talent_data_filled['strengths'] = talent_data_filled['strengths'].apply(ast.literal_eval)
talent_data_filled['weaknesses'] = talent_data_filled['weaknesses'].apply(ast.literal_eval)

print("Data types after converting JSON strings back to lists/dictionaries:")
print(talent_data_filled.dtypes)

Data types after converting JSON strings back to lists/dictionaries:
name                        object
date                        object
tech_self_score             object
strengths                   object
weaknesses                  object
                             ...  
Tuesday 9 April 2019        object
Wednesday 9 January 2019    object
Tuesday 9 July 2019         object
Thursday 9 May 2019         object
Wednesday 9 October 2019    object
Length: 176, dtype: object


In [306]:
############ THIS IS A TESTTTR!!!!!!!!!!

# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_filled['name'])
talent_names = set(talent_data_filled['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [307]:
academy_data_cleaned = academy_data_filled.drop_duplicates()

In [308]:
############ THIS IS A TESTTTR!!!!!!!!!!

# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_cleaned['name'])
talent_names = set(talent_data_filled['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [309]:
print(talent_data_filled.dtypes) ##

name                        object
date                        object
tech_self_score             object
strengths                   object
weaknesses                  object
                             ...  
Tuesday 9 April 2019        object
Wednesday 9 January 2019    object
Tuesday 9 July 2019         object
Thursday 9 May 2019         object
Wednesday 9 October 2019    object
Length: 176, dtype: object


In [310]:
for column in talent_data_filled.columns:
    print(f"Unique values in column '{column}':")
    try:
        print(talent_data_filled[column].unique())
    except Exception as e:
        print(f"Error occurred while processing column '{column}': {e}")
    print()


Unique values in column 'name':
['Stillmann Castano' 'Hilary Willmore' 'Efrem Whipple' ...
 'Vivianna Letty' 'Mercie Groger' nan]

Unique values in column 'date':
['22/08/2019' '01/08/2019' '28/08/2019' '07/08/2019' '14/08/2019'
 '29/08/2019' '21/08/2019' '08/08/2019' '15/08/2019' '06/08/2019'
 '13/08/2019' '27/08/2019' '20/08/2019' '13//08/2019' '28//08/2019'
 '18/07/2019' '31/07/2019' '10/07/2019' '23/07/2019' '09/07/2019'
 '16/07/2019' '17/07/2019' '24/07/2019' '25/07/2019' '11/07/2019'
 '02/07/2019' '03/07/2019' '30/07/2019' '04/07/2019' '11//07/2019'
 '17//07/2019' '12/07/2019' '20/07/2019' '06/07/2019' '27/07/2019'
 '05/07/2019' '13/07/2019' '26/07/2019' '19/07/2019' '05//07/2019'
 '25//07/2019' '13//07/2019' '05//12/2019' '10/12/2019' '17/12/2019'
 '18/12/2019' '05/12/2019' '11/12/2019' '04/12/2019' '12/12/2019'
 '19/12/2019' '11//12/2019' '03/12/2019' '18//12/2019' '28/02/2019'
 '13/02/2019' '19/02/2019' '06/02/2019' '12/02/2019' '27/02/2019'
 '20/02/2019' '07/02/2019' '14/02/2

In [311]:
print("Before removing duplicates - 'name' column:")
print(talent_data_filled['name'])

Before removing duplicates - 'name' column:
0        Stillmann Castano
1          Hilary Willmore
2            Efrem Whipple
3              Sydel Fenne
4          Michel Lebarree
               ...        
12077                  NaN
12078                  NaN
12079                  NaN
12080                  NaN
12081                  NaN
Name: name, Length: 12082, dtype: object


In [312]:
# Exclude the 'None' column from processing
#columns_to_convert = [col for col in talent_data_filled.columns if col != 'None']
columns_to_convert = [col for col in talent_data_filled.columns if col != 'name']


try:
    # Convert dicts and lists to strings in specified columns
    for column in columns_to_convert:
        talent_data_filled[column] = talent_data_filled[column].apply(json.dumps)

    # Remove duplicates
    print("Removing duplicate rows in Talent Data...")
    talent_data_filled.drop_duplicates(inplace=True)
    print("Duplicate rows removed successfully!")

except Exception as e:
    print(f"Error occurred while removing duplicates: {e}")

Removing duplicate rows in Talent Data...
Duplicate rows removed successfully!


In [313]:
print("After removing duplicates - 'name' column:")
print(talent_data_filled['name'])

After removing duplicates - 'name' column:
0        Stillmann Castano
1          Hilary Willmore
2            Efrem Whipple
3              Sydel Fenne
4          Michel Lebarree
               ...        
12077                  NaN
12078                  NaN
12079                  NaN
12080                  NaN
12081                  NaN
Name: name, Length: 12050, dtype: object


In [314]:
############ THIS IS A TESTTTR!!!!!!!!!!

# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_cleaned['name'])
talent_names = set(talent_data_filled['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [315]:
talent_data_cleaned = talent_data_filled.drop_duplicates()

In [316]:
############ THIS IS A TESTTTR!!!!!!!!!!

# Extract unique names from academy_data and talent_data
academy_names = set(academy_data_cleaned['name'])
talent_names = set(talent_data_cleaned['name'])

# Check if each name in academy_data exists in talent_data
common_names = [name for name in academy_names if name in talent_names]

print("Number of names from academy_data found in talent_data:", len(common_names))

Number of names from academy_data found in talent_data: 397


In [317]:
print("\nSummary statistics for Academy Data:")
print(academy_data_cleaned.describe())



Summary statistics for Academy Data:
       Analytic_W1  Independent_W1  Determined_W1  Professional_W1  \
count   397.000000      397.000000     397.000000       397.000000   
mean      3.120907        3.173804       2.992443         2.957179   
std       1.696845        1.597826       1.548033         1.605131   
min       1.000000        1.000000       1.000000         1.000000   
25%       2.000000        2.000000       2.000000         2.000000   
50%       3.000000        3.000000       3.000000         3.000000   
75%       4.000000        4.000000       4.000000         4.000000   
max       8.000000        8.000000       8.000000         8.000000   

       Studious_W1  Imaginative_W1  Analytic_W2  Independent_W2  \
count   397.000000      397.000000   397.000000      397.000000   
mean      2.921914        3.128463     2.899244        2.962217   
std       1.491203        1.625591     1.642222        1.764144   
min       1.000000        1.000000     0.000000        0.000000

In [318]:
print("\nSummary statistics for Talent Data:")
print(talent_data_cleaned.describe(include='all'))


Summary statistics for Talent Data:
                     name   date tech_self_score strengths weaknesses  \
count                7764  12050           12050     12050      12050   
unique               4836    173            2573      1532       1447   
top     Shurlocke Cringle    NaN            "{}"      "[]"       "[]"   
freq                    3   8977            9031      8977       8977   

       self_development geo_flex financial_support_self result  \
count             12050    12050                  12050  12050   
unique                2        2                      2      3   
top                "No"     "No"                   "No"    NaN   
freq               9278     9299                   9316   8977   

       course_interest  ... Thursday 7 November 2019 Thursday 8 August 2019  \
count            12050  ...                    12050                  12050   
unique               4  ...                       31                     36   
top             "None"  ...  

In [319]:
print("\nColumn types in Academy Data:")
print(academy_data_cleaned.dtypes)


Column types in Academy Data:
name                 object
trainer              object
Analytic_W1           int64
Independent_W1        int64
Determined_W1         int64
                     ...   
Independent_W10     float64
Determined_W10      float64
Professional_W10    float64
Studious_W10        float64
Imaginative_W10     float64
Length: 62, dtype: object


In [320]:
print("\nColumn types in Talent Data:")
print(talent_data_cleaned.dtypes)


Column types in Talent Data:
name                        object
date                        object
tech_self_score             object
strengths                   object
weaknesses                  object
                             ...  
Tuesday 9 April 2019        object
Wednesday 9 January 2019    object
Tuesday 9 July 2019         object
Thursday 9 May 2019         object
Wednesday 9 October 2019    object
Length: 176, dtype: object


In [321]:
# Convert specific columns to appropriate data types
academy_data_cleaned = academy_data_cleaned.convert_dtypes()
talent_data_cleaned = talent_data_cleaned.convert_dtypes()

print("Data types after conversion:")
print(academy_data_cleaned.dtypes)
print(talent_data_cleaned.dtypes)

Data types after conversion:
name                string[python]
trainer             string[python]
Analytic_W1                  Int64
Independent_W1               Int64
Determined_W1                Int64
                         ...      
Independent_W10              Int64
Determined_W10               Int64
Professional_W10             Int64
Studious_W10                 Int64
Imaginative_W10              Int64
Length: 62, dtype: object
name                        string[python]
date                        string[python]
tech_self_score             string[python]
strengths                   string[python]
weaknesses                  string[python]
                                 ...      
Tuesday 9 April 2019        string[python]
Wednesday 9 January 2019    string[python]
Tuesday 9 July 2019         string[python]
Thursday 9 May 2019         string[python]
Wednesday 9 October 2019    string[python]
Length: 176, dtype: object


In [322]:
# Standardise column names
academy_data_cleaned.columns = academy_data_cleaned.columns.str.lower().str.replace(' ', '_')
talent_data_cleaned.columns = talent_data_cleaned.columns.str.lower().str.replace(' ', '_')

In [323]:
print("Dimensions of Academy Data:")
print(f"Number of rows: {academy_data.shape[0]}")
print(f"Number of columns: {academy_data.shape[1]}")


#Dimensions of Academy Data:
#Number of rows: 397
#Number of columns: 62




Dimensions of Academy Data:
Number of rows: 397
Number of columns: 62


In [324]:
print("Dimensions of Talent Data:")
print(f"Number of rows: {talent_data.shape[0]}")
print(f"Number of columns: {talent_data.shape[1]}")


#Dimensions of Talent Data:
#Number of rows: 12082
#Number of columns: 176


Dimensions of Talent Data:
Number of rows: 12082
Number of columns: 176


In [325]:
# Check for anomalies or outliers 
print(academy_data_cleaned.describe())
print(talent_data_cleaned.describe())

# Check for invalid entries in categorical columns
print(academy_data_cleaned['name'].value_counts())
print(talent_data_cleaned['name'].value_counts())


       analytic_w1  independent_w1  determined_w1  professional_w1  \
count        397.0           397.0          397.0            397.0   
mean      3.120907        3.173804       2.992443         2.957179   
std       1.696845        1.597826       1.548033         1.605131   
min            1.0             1.0            1.0              1.0   
25%            2.0             2.0            2.0              2.0   
50%            3.0             3.0            3.0              3.0   
75%            4.0             4.0            4.0              4.0   
max            8.0             8.0            8.0              8.0   

       studious_w1  imaginative_w1  analytic_w2  independent_w2  \
count        397.0           397.0        397.0           397.0   
mean      2.921914        3.128463     2.899244        2.962217   
std       1.491203        1.625591     1.642222        1.764144   
min            1.0             1.0          0.0             0.0   
25%            2.0             2.0

In [326]:
# List all column names 
print("Column names in Academy Data:")
print(academy_data_cleaned.columns.tolist())


print("\nColumn names in Talent Data:")
print(talent_data_cleaned.columns.tolist())


Column names in Academy Data:
['name', 'trainer', 'analytic_w1', 'independent_w1', 'determined_w1', 'professional_w1', 'studious_w1', 'imaginative_w1', 'analytic_w2', 'independent_w2', 'determined_w2', 'professional_w2', 'studious_w2', 'imaginative_w2', 'analytic_w3', 'independent_w3', 'determined_w3', 'professional_w3', 'studious_w3', 'imaginative_w3', 'analytic_w4', 'independent_w4', 'determined_w4', 'professional_w4', 'studious_w4', 'imaginative_w4', 'analytic_w5', 'independent_w5', 'determined_w5', 'professional_w5', 'studious_w5', 'imaginative_w5', 'analytic_w6', 'independent_w6', 'determined_w6', 'professional_w6', 'studious_w6', 'imaginative_w6', 'analytic_w7', 'independent_w7', 'determined_w7', 'professional_w7', 'studious_w7', 'imaginative_w7', 'analytic_w8', 'independent_w8', 'determined_w8', 'professional_w8', 'studious_w8', 'imaginative_w8', 'analytic_w9', 'independent_w9', 'determined_w9', 'professional_w9', 'studious_w9', 'imaginative_w9', 'analytic_w10', 'independent_w10

In [327]:
# Check for common columns
academy_columns = set(academy_data_cleaned.columns)
talent_columns = set(talent_data_cleaned.columns)

common_columns = academy_columns.intersection(talent_columns)
print("\nCommon columns between Academy Data and Talent Data:")
print(common_columns)


Common columns between Academy Data and Talent Data:
{'name'}


In [329]:
# Check the format and data type of the "name" column in academy_data_cleaned dataframe
print("Format and data type of 'name' column in academy_data_cleaned:")
print(academy_data_cleaned['name'].head())  # Display the first few entries of the column
print("Data type:", academy_data_cleaned['name'].dtype)  # Display the data type of the column

# Check the format and data type of the "name" column in talent_data_cleaned dataframe
print("\nFormat and data type of 'name' column in talent_data_cleaned:")
print(talent_data_cleaned['name'].head())  # Display the first few entries of the column
print("Data type:", talent_data_cleaned['name'].dtype)  # Display the data type of the column


Format and data type of 'name' column in academy_data_cleaned:
0    Quintus Penella
1       Simon Murrey
2        Gustaf Lude
3      Yolanda Fosse
4       Lynnett Swin
Name: name, dtype: string
Data type: string

Format and data type of 'name' column in talent_data_cleaned:
0    Stillmann Castano
1      Hilary Willmore
2        Efrem Whipple
3          Sydel Fenne
4      Michel Lebarree
Name: name, dtype: string
Data type: string


In [330]:
# Check how many name values from academy_data_cleaned are in talent_data_cleaned name column
common_names_count = academy_data_cleaned['name'].isin(talent_data_cleaned['name']).sum()

print(f"Number of names from academy_data_cleaned found in talent_data_cleaned: {common_names_count}")


Number of names from academy_data_cleaned found in talent_data_cleaned: 397


In [328]:
# Merge datasets based on the "name" column
merged_data = pd.merge(academy_data_cleaned, talent_data_cleaned, on='name', how='inner')

# Display the merged data
print("Merged Data:")
print(merged_data.head())

Merged Data:
              name       trainer  analytic_w1  independent_w1  determined_w1  \
0  Quintus Penella  Gregor Gomez            1               2              2   
1  Quintus Penella  Gregor Gomez            1               2              2   
2     Simon Murrey  Gregor Gomez            6               1              1   
3     Simon Murrey  Gregor Gomez            6               1              1   
4      Gustaf Lude  Gregor Gomez            6               4              1   

   professional_w1  studious_w1  imaginative_w1  analytic_w2  independent_w2  \
0                1            2               2            0               0   
1                1            2               2            0               0   
2                2            4               2            3               1   
3                2            4               2            3               1   
4                1            2               3            1               1   

   ...  thursday_7_novemb

In [333]:
# Test search for a user
simon_murrey_records = merged_data[merged_data['name'] == 'Simon Murrey']
simon_murrey_records_all_columns = merged_data.loc[merged_data['name'] == 'Simon Murrey']
# Filtered records for Simon Murrey
print("Filtered records for Simon Murrey:")
print(simon_murrey_records)

# All columns for Simon Murrey
print("\nAll columns for Simon Murrey:")
print(simon_murrey_records_all_columns)


Filtered records for Simon Murrey:
           name       trainer  analytic_w1  independent_w1  determined_w1  \
2  Simon Murrey  Gregor Gomez            6               1              1   
3  Simon Murrey  Gregor Gomez            6               1              1   

   professional_w1  studious_w1  imaginative_w1  analytic_w2  independent_w2  \
2                2            4               2            3               1   
3                2            4               2            3               1   

   ...  thursday_7_november_2019  thursday_8_august_2019  \
2  ...                       NaN                     NaN   
3  ...                       NaN                     NaN   

   tuesday_8_january_2019  wednesday_8_may_2019  tuesday_8_october_2019  \
2                     NaN                   NaN                     NaN   
3                     NaN                   NaN                     NaN   

   tuesday_9_april_2019  wednesday_9_january_2019  tuesday_9_july_2019  \
2          

In [336]:
# Filter the dataframe to only include records for Simon Murrey
simon_murrey_records = merged_data[merged_data['name'] == 'Simon Murrey']

# Check for duplicate rows
duplicate_rows = simon_murrey_records[simon_murrey_records.duplicated()]

if duplicate_rows.empty:
    print("No duplicate records found for Simon Murrey.")
else:
    print("Duplicate records found for Simon Murrey.")
    print(duplicate_rows)


No duplicate records found for Simon Murrey.
