In [20]:
import csv

def txt_to_csv(txt_file, csv_file):
  """Converts a TXT file to a CSV file with pipe delimiter.

  Args:
    txt_file: Path to the input TXT file.
    csv_file: Path to the output CSV file.
  """

  with open(txt_file, 'r') as f_in, open(csv_file, 'w', newline='') as f_out:
    csv_writer = csv.writer(f_out, delimiter='|')
    for line in f_in:
      csv_writer.writerow(line.strip().split('|'))

# Example usage:
txt_file = 'chess.txt'
csv_file = 'chess.csv'
txt_to_csv(txt_file, csv_file)

In [21]:
import pandas as pd
import re

In [22]:
# Open the input file in read mode and the output file in write mode
with open("tournament.txt", "r") as infile, open("tournament.csv", "w") as outfile:
    for line in infile:
        # Check if the line contains the separator; if not, replace pipes with commas and write it
        if "-----------------------------------------------------------------------------------------" not in line:
            comma_separated_line = line.replace("|", ",")
            outfile.write(comma_separated_line)

print("Conversion complete. The data has been saved in tournament.csv without separators.")

Conversion complete. The data has been saved in tournament.csv without separators.


In [23]:
import pandas as pd
import re

# Step 1: Load the file and inspect the contents
with open('tournament.txt', 'r') as file:
    data = file.readlines()

# Step 2: Parse data
players = []
for line in data:
    # Use regex to extract relevant parts or split based on known patterns
    # For example:
    match = re.match(r"regex-pattern", line)
    if match:
        player_data = {
            "Name": match.group(1),
            "State": match.group(2),
            "Points": float(match.group(3)),
            "Pre-Rating": int(match.group(4)),
            "Opponents_Ratings": [int(x) for x in match.group(5).split()]
        }
        players.append(player_data)

# Step 3: Calculate the average opponent rating for each player
for player in players:
    player['Average_Opponent_Rating'] = sum(player['Opponents_Ratings']) / len(player['Opponents_Ratings'])

# Step 4: Convert to DataFrame
df = pd.DataFrame(players)

# Step 5: Export to CSV
df.to_csv("chess_tournament_results.csv", index=False)

In [24]:
import csv
import re

# Sample input data (shortened for example purposes)
players_data = [
    {
        "name": "GARY HUA",
        "state": "ON",
        "total_points": 6.0,
        "pre_rating": 1794,
        "opponent_ratings": [1632, 1576, 1482, 1522, 1602, 1620, 1578],  # Example opponent ratings
    },
    {
        "name": "DAKSHESH DARURI",
        "state": "MI",
        "total_points": 6.0,
        "pre_rating": 1553,
        "opponent_ratings": [1480, 1532, 1630, 1450, 1500, 1610, 1470],
    },
    # More players can be added similarly
]

# Function to compute average rating of opponents
def calculate_average_opponent_rating(opponent_ratings):
    return sum(opponent_ratings) / len(opponent_ratings) if opponent_ratings else 0

# Create CSV file
with open('chess_players.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Player's Name", "Player's State", "Total Number of Points", "Player's Pre-Rating", "Average Pre Tournament Chess Rating of Opponents"])

    for player in players_data:
        average_opponent_rating = calculate_average_opponent_rating(player["opponent_ratings"])
        writer.writerow([player["name"], player["state"], player["total_points"], player["pre_rating"], average_opponent_rating])

print("CSV file 'chess_players.csv' has been created.")

CSV file 'chess_players.csv' has been created.


In [25]:
print(df.columns)

RangeIndex(start=0, stop=0, step=1)


In [26]:
data.columns = data.columns.str.strip()  # This removes any leading/trailing whitespace
print(data.columns)  # Check again to confirm

AttributeError: 'list' object has no attribute 'columns'

In [None]:
print([repr(col) for col in data.columns])

["'Pair'", "'Player Name'", "'Total'", "'Round'", "'Round.1'", "'Round.2'", "'Round.3'", "'Round.4'", "'Round.5'", "'Round.6'", "''"]


In [None]:
try:
    data[['USCF ID', 'Rating Info']] = data['USCF ID / Rtg (Pre->Post)'].str.split(' / ', expand=True)
except KeyError as e:
    print(f"Column not found: {e}")

Column not found: 'USCF ID / Rtg (Pre->Post)'


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('tournament_data.csv')

# Print the column names to identify any discrepancies
print("Available columns:", data.columns)

# Attempt to split the 'USCF ID / Rtg (Pre->Post)' column into separate columns for 'ID', 'Pre Rating', 'Post Rating'
try:
    data[['USCF ID', 'Rating Info']] = data['USCF ID / Rtg (Pre->Post)'].str.split(' / ', expand=True)
    data[['Pre Rating', 'Post Rating']] = data['Rating Info'].str.split('->', expand=True)

    # Convert ratings to integers
    data['Pre Rating'] = data['Pre Rating'].astype(int)
    data['Post Rating'] = data['Post Rating'].astype(int)

    # Split the results columns and clean them
    round_columns = [f'Round {i}' for i in range(1, 8)]
    for i, col in enumerate(round_columns):
        data[col] = data[col].apply(lambda x: x.split(' ')[0])  # Take only the result (W, L, D)

    # Total points calculation (assuming each W=1, D=0.5, L=0)
    def calculate_points(row):
        points = 0
        for i in round_columns:
            if row[i] == 'W':
                points += 1
            elif row[i] == 'D':
                points += 0.5
        return points

    data['Total Points'] = data.apply(calculate_points, axis=1)

    # Calculate rating change
    data['Rating Change'] = data['Post Rating'] - data['Pre Rating']

    # Summary statistics
    summary = data[['Player Name', 'Total Points', 'Pre Rating', 'Post Rating', 'Rating Change']]

    # Show the summary table
    print(summary)

    # Visualizing total points
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Player Name', y='Total Points', data=summary.sort_values(by='Total Points', ascending=False))
    plt.xticks(rotation=90)
    plt.title('Player Performance by Total Points')
    plt.xlabel('Player Name')
    plt.ylabel('Total Points')
    plt.show()

    # Visualizing Rating Changes
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Player Name', y='Rating Change', data=summary.sort_values(by='Rating Change', ascending=False))
    plt.xticks(rotation=90)
    plt.title('Rating Change per Player')
    plt.xlabel('Player Name')
    plt.ylabel('Rating Change')
    plt.show()

except KeyError as e:
    print(f"Column not found: {e}")

Available columns: Index([' Pair ', ' Player Name                     ', 'Total', 'Round',
       'Round.1', 'Round.2', 'Round.3', 'Round.4', 'Round.5', 'Round.6', ' '],
      dtype='object')
Column not found: 'USCF ID / Rtg (Pre->Post)'


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('tournament_data.csv')

# Clean column names by stripping leading/trailing spaces
data.columns = data.columns.str.strip()

# Print the cleaned column names
print("Cleaned columns:", data.columns)

# Check the first few rows to understand the data structure
print(data.head())

# At this point, check if there is any column related to ratings (Pre/Post Rating).
# If there's no such column, you may need to add it manually or adjust the analysis accordingly.

Cleaned columns: Index(['Pair', 'Player Name', 'Total', 'Round', 'Round.1', 'Round.2',
       'Round.3', 'Round.4', 'Round.5', 'Round.6', ''],
      dtype='object')
     Pair                        Player Name  Total  Round Round.1 Round.2  \
0   Num     USCF ID / Rtg (Pre->Post)          Pts     1       2       3     
1      1    GARY HUA                          6.0    W  39   W  21   W  18   
2     ON    15445895 / R: 1794   ->1817       N:2    W       B       W       
3      2    DAKSHESH DARURI                   6.0    W  63   W  58   L   4   
4     MI    14598900 / R: 1553   ->1663       N:2    B       W       B       

  Round.3 Round.4 Round.5 Round.6       
0     4       5       6       7         
1   W  14   W   7   D  12   D   4  NaN  
2   B       W       B       W      NaN  
3   W  17   W  16   W  20   W   7  NaN  
4   W       B       W       B      NaN  


In [None]:
print(df.columns)

Index([' Pair ', ' Player Name                     ', 'Total', 'Round',
       'Round.1', 'Round.2', 'Round.3', 'Round.4', 'Round.5', 'Round.6', ' '],
      dtype='object')


In [None]:
import pandas as pd

# Load the CSV file
file_path = 'tournament_data.csv'
data = pd.read_csv(file_path)

# Print column names to check for accuracy
print(data.columns)

# Preview the data
print(data.head())


Index([' Pair ', ' Player Name                     ', 'Total', 'Round',
       'Round.1', 'Round.2', 'Round.3', 'Round.4', 'Round.5', 'Round.6', ' '],
      dtype='object')
    Pair    Player Name                       Total  Round Round.1 Round.2  \
0   Num     USCF ID / Rtg (Pre->Post)          Pts     1       2       3     
1      1    GARY HUA                          6.0    W  39   W  21   W  18   
2     ON    15445895 / R: 1794   ->1817       N:2    W       B       W       
3      2    DAKSHESH DARURI                   6.0    W  63   W  58   L   4   
4     MI    14598900 / R: 1553   ->1663       N:2    B       W       B       

  Round.3 Round.4 Round.5 Round.6       
0     4       5       6       7         
1   W  14   W   7   D  12   D   4  NaN  
2   B       W       B       W      NaN  
3   W  17   W  16   W  20   W   7  NaN  
4   W       B       W       B      NaN  


In [None]:
import pandas as pd
import re

# Load the CSV file
file_path = 'tournament_data.csv'
data = pd.read_csv(file_path)

# Print column names to check for accuracy
print(data.columns)

# Preview the first few rows of data
print(data.head())

# Function to remove all NaN values
def remove_nan(df):
    return df.dropna()

# Remove all NaN values from the dataset
data = remove_nan(data)

# Clean up the 'Total' column by stripping extra spaces and handling non-numeric values
data['Total'] = data['Total'].str.strip()
data = data[data['Total'].apply(lambda x: x.isnumeric())]

# Define a function to calculate the average pre-tournament rating of opponents
def calculate_avg_opponent_rating(row):
    opponents = re.findall(r'[WL] (\d+)', row[' Pair '])
    opponent_ratings = []
    for opponent in opponents:
        opponent_row = data.iloc[int(opponent) - 1]
        opponent_pre_rating = int(re.search(r'R: (\d+)', opponent_row[' Player Name                     ']).group(1))
        opponent_ratings.append(opponent_pre_rating)
    if opponent_ratings:
        return sum(opponent_ratings) / len(opponent_ratings)
    return 0

# Process each row in the data
players = []
for index, row in data.iterrows():
    name = row[' Player Name                     '].strip()
    points = float(row['Total'])
    pre_rating = int(re.search(r'R: (\d+)', row[' Player Name                     ']).group(1))
    avg_opponent_rating = calculate_avg_opponent_rating(row)
    players.append([name, points, pre_rating, avg_opponent_rating])

# Create DataFrame and save to CSV
df = pd.DataFrame(players, columns=['Player’s Name', 'Total Number of Points', 'Player’s Pre-Rating', 'Average Pre Tournament Chess Rating of Opponents'])
df.to_csv('processed_tournament_data.csv', index=False)

print(df)


Index([' Pair ', ' Player Name                     ', 'Total', 'Round',
       'Round.1', 'Round.2', 'Round.3', 'Round.4', 'Round.5', 'Round.6', ' '],
      dtype='object')
    Pair    Player Name                       Total  Round Round.1 Round.2  \
0   Num     USCF ID / Rtg (Pre->Post)          Pts     1       2       3     
1      1    GARY HUA                          6.0    W  39   W  21   W  18   
2     ON    15445895 / R: 1794   ->1817       N:2    W       B       W       
3      2    DAKSHESH DARURI                   6.0    W  63   W  58   L   4   
4     MI    14598900 / R: 1553   ->1663       N:2    B       W       B       

  Round.3 Round.4 Round.5 Round.6       
0     4       5       6       7         
1   W  14   W   7   D  12   D   4  NaN  
2   B       W       B       W      NaN  
3   W  17   W  16   W  20   W   7  NaN  
4   W       B       W       B      NaN  
Empty DataFrame
Columns: [Player’s Name, Total Number of Points, Player’s Pre-Rating, Average Pre Tournament Che

In [None]:
import pandas as pd
import re

# Load the CSV file (update with the correct path if necessary)
file_path = 'tournament_data.csv'
data = pd.read_csv(file_path)

# Print column names to check for accuracy
print(data.columns)

# Preview the first few rows of data
print(data.head())

# Function to clean up the rating and remove extra characters after 'R:'
def clean_rating(value):
    # Use regular expression to find the rating after 'R:' and remove any extra characters after it
    if isinstance(value, str):
        match = re.search(r'R:\s*(\d+)', value)
        if match:
            return match.group(1)  # Return only the numeric rating after 'R:'
    return value

# Apply the function to clean up the 'USCF ID / Rtg (Pre->Post)' column
data[' Player Name                     '] = data[' Player Name                     '].apply(clean_rating)

# Clean up any NaN values if necessary
data = data.dropna()

# Save the cleaned data to a new CSV file
data.to_csv('cleaned_tournament_data.csv', index=False)

# Preview the cleaned data
print(data.head())


Index([' Pair ', ' Player Name                     ', 'Total', 'Round',
       'Round.1', 'Round.2', 'Round.3', 'Round.4', 'Round.5', 'Round.6', ' '],
      dtype='object')
    Pair    Player Name                       Total  Round Round.1 Round.2  \
0   Num     USCF ID / Rtg (Pre->Post)          Pts     1       2       3     
1      1    GARY HUA                          6.0    W  39   W  21   W  18   
2     ON    15445895 / R: 1794   ->1817       N:2    W       B       W       
3      2    DAKSHESH DARURI                   6.0    W  63   W  58   L   4   
4     MI    14598900 / R: 1553   ->1663       N:2    B       W       B       

  Round.3 Round.4 Round.5 Round.6       
0     4       5       6       7         
1   W  14   W   7   D  12   D   4  NaN  
2   B       W       B       W      NaN  
3   W  17   W  16   W  20   W   7  NaN  
4   W       B       W       B      NaN  
    Pair    Player Name                       Total  Round Round.1 Round.2  \
0   Num     USCF ID / Rtg (Pre->Po

In [None]:
import requests

# Download the text file
url = "https://raw.githubusercontent.com/ZacharyHerold/chinafundnews/master/tournamentinfo.txt"
response = requests.get(url)
text = response.text

# Split lines, remove empty lines and duplicate of the first line
lines = text.splitlines()
lines = [line.strip() for line in lines]  # Remove leading/trailing whitespaces
lines = list(set(lines[1:]))  # Remove empty lines and duplicate of the first line

# Extract names (assuming two or more consecutive words separated by a space)
names = []
for line in lines:
    words = line.split()
    if len(words) >= 2:
        names.append(" ".join(words))

# Extract states (assuming two uppercase letters)
states = []
for line in lines:
    if len(line) >= 2 and line[:2].isupper():
        states.append(line[:2])

print("Names:", names)
print("States:", states)
print("Number of states:", len(states))

Names: ['8 | EZEKIEL HOUGHTON |5.0 |W 3|W 32|L 14|L 9|W 47|W 28|W 19|', '33 | JADE GE |3.5 |W 60|L 12|W 50|D 36|L 13|L 15|W 51|', 'MI | 15142253 / R: 1641P17->1657P24 |N:3 |B |W |B |W |B |W |W |', '4 | PATRICK H SCHILLING |5.5 |W 23|D 28|W 2|W 26|D 5|W 19|D 1|', '48 | DANIEL KHAIN |2.5 |L 17|W 63|H |D 52|H |L 29|L 35|', '63 | THOMAS JOSEPH HOSMER |1.0 |L 2|L 48|D 49|L 43|L 45|H |U |', 'MI | 15490981 / R: 377P3 ->1076P10 | |B |W |B |W |B |W |W |', '23 | ALAN BUI |4.0 |L 4|W 43|L 20|W 58|L 17|W 37|W 46|', 'MI | 12616049 / R: 1716 ->1744 |N:2 |W |B |W |B |W |B |B |', 'MI | 12405534 / R: 1555 ->1529 |N:4 |W |B |W |B | |W |B |', '59 | SEAN M MC CORMICK |2.0 |L 41|B |L 9|L 40|L 43|W 54|L 44|', 'MI | 14882954 / R: 1507 ->1513 |N:3 |W |W |B |W |B |B |W |', 'ON | 15445895 / R: 1794 ->1817 |N:2 |W |B |W |B |W |B |W |', '36 | SIDDHARTH JHA |3.5 |L 13|W 57|W 51|D 33|H |L 16|D 28|', '6 | HANSEN SONG |5.0 |W 34|D 29|L 11|W 35|D 10|W 27|W 21|', 'ON | 15131520 / R: 1579 ->1564 |N:4 |B |W |B |W |B |W |

In [28]:
# Assuming you have the text data from the tournament info
text_data = """
# Your tournament data here
"""

# Extract names and states from the text data (adjust the extraction logic as needed)
Names = []
States = []
for line in text_data.splitlines():
    if "|" in line:
        name_parts = line.split("|")
        name = name_parts[1].strip()
        state = name_parts[0].strip()  # Assuming state is the first part
        Names.append(name)
        States.append(state)

# Create the DataFrame
player_data = []
for name_str, state in zip(Names, States):
    parts = name_str.split('|')
    player_id, player_name, total_points, *rounds = parts
    player_data.append([player_id.strip(), player_name.strip(), total_points.strip(), *rounds, state])

# Create the DataFrame
df = pd.DataFrame(player_data, columns=['Player ID', 'Player Name', 'Total Points', 'Round 1', 'Round 2', 'Round 3', 'Round 4', 'Round 5', 'Round 6', 'Round 7', 'State'])

print(df.head())

Empty DataFrame
Columns: [Player ID, Player Name, Total Points, Round 1, Round 2, Round 3, Round 4, Round 5, Round 6, Round 7, State]
Index: []
