In [3]:
import pandas as pd
import numpy as np
import re
from sklearn.preprocessing import MinMaxScaler

creating a normalized dataset

In [None]:
# File paths
input_file = "C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv" # Change this to your input file path
#input_file = "C:\\Users\\Cheig\\capstone\\dh_stuff\\SHADXWBXRN_analysis_20250331_173555\\data\\dh_data.csv"
output_file = 'processed_dataset2.csv'  # Change this to your desired output file path

# Load the dataset
print(f"Loading dataset from {input_file}...")
df = pd.read_csv(input_file)

# Apply the key_to_number function to convert Estimated_Key to numerical values
print("Converting Estimated_Key to numerical values...")
df['Estimated_Key'] = df['Estimated_Key'].apply(key_to_number)

# Remove Title, Artist, and Album columns
print("Removing Title, Artist, and Album columns...")
columns_to_remove = ['Title', 'Artist', 'Album']
df_processed = df.drop(columns=columns_to_remove)

# Normalize each column (excluding song_id)
print("Normalizing numerical columns...")
scaler = MinMaxScaler()
columns_to_normalize = [col for col in df_processed.columns if col != 'song_id']

# Handle potential NaN values before normalization
for col in columns_to_normalize:
    # Fill NaN values with column mean if numerical
    if pd.api.types.is_numeric_dtype(df_processed[col]):
        mean_val = df_processed[col].mean()
        df_processed[col] = df_processed[col].fillna(mean_val)

# Separate song_id before normalization
song_ids = df_processed['song_id']
df_to_normalize = df_processed.drop(columns=['song_id'])

# Apply normalization only to numeric columns
numeric_columns = df_to_normalize.select_dtypes(include=[np.number]).columns
df_to_normalize[numeric_columns] = scaler.fit_transform(df_to_normalize[numeric_columns])

# Reattach song_id
df_normalized = pd.concat([song_ids, df_to_normalize], axis=1)

# Save the processed dataset to a new file
print(f"Saving processed dataset to {output_file}...")
df_normalized.to_csv(output_file, index=False)

print("Processing complete!")

Loading dataset from C:\Users\Cheig\capstone\dh_stuff\SHADXWBXRN_analysis_20250331_173555\data\dh_data.csv...
Converting Estimated_Key to numerical values...
Removing Title, Artist, and Album columns...
Normalizing numerical columns...
Saving processed dataset to processed_dataset2.csv...
Processing complete!


In [1]:
#Converting the key into a numperical feature 
def key_to_number(key_str):
    """Convert key strings to numerical values.
    Major keys: C=0, C#=1, D=2, ... B=11
    Minor keys: Am=12, A#m=13, Bm=14, ... G#m=23
    
    Handles various notations:
    - Full notation: 'C major', 'D minor'
    - Short notation: 'C', 'Dm'
    - Symbol notation: 'C#', 'F#m'
    """
    if pd.isna(key_str) or key_str == '':
        return np.nan
    
    # Standardize the key string (replace flats with equivalent sharps)
    key_str = key_str.replace('Ab', 'G#').replace('Bb', 'A#').replace('Cb', 'B').replace('Db', 'C#').replace('Eb', 'D#')
    key_str = key_str.strip()
    
    # Define all possible notes
    notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    
    # Case 1: Full notation (e.g., "C major", "D minor")
    full_notation = re.match(r'([A-G][#]?)\s*(major|minor)$', key_str)
    if full_notation:
        note, scale = full_notation.groups()
        note_idx = notes.index(note) if note in notes else -1
        if note_idx >= 0:
            if scale == 'major':
                return note_idx
            else:  # minor
                return note_idx + 3 + 12  # Relative minor is 3 semitones up, then add 12 to differentiate
    
    # Case 2: Short notation with explicit minor (e.g., "Dm", "F#m")
    short_minor = re.match(r'([A-G][#]?)m$', key_str)
    if short_minor:
        note = short_minor.group(1)
        note_idx = notes.index(note) if note in notes else -1
        if note_idx >= 0:
            return note_idx + 3 + 12  # Minor key
    
    # Case 3: Just the note name - assume major (e.g., "C", "F#")
    if key_str in notes:
        return notes.index(key_str)  # Major key
    
    print(f"Could not parse key: {key_str}")
    return np.nan
def main():
    # Read the CSV file
    df = pd.read_csv("C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv")  # Replace with your actual file name
    
    # Convert Estimated_Key to numerical values
    df['Estimated_Key_Numeric'] = df['Estimated_Key'].apply(key_to_number)
    
    # Identify columns to process (exclude song_id, Title, Artist, Album)
    exclude_cols = ['song_id', 'Title', 'Artist', 'Album']
    feature_cols = [col for col in df.columns if col not in exclude_cols]
    
    # Create a dictionary to store ordered song IDs for each feature
    ordered_songs_dict = {}
    
    # For each feature, sort songs and extract ordered list of song_ids
    for feature in feature_cols:
        # Skip if the feature is non-numeric
        if df[feature].dtype == 'object' and feature != 'Estimated_Key':
            print(f"Skipping non-numeric feature: {feature}")
            continue
        
        # Use the numeric version for Estimated_Key
        if feature == 'Estimated_Key':
            feature = 'Estimated_Key_Numeric'
        
        # Sort by feature value and extract song_ids
        try:
            # Drop NaN values before sorting
            sorted_df = df.dropna(subset=[feature]).sort_values(by=feature)
            ordered_song_ids = sorted_df['song_id'].tolist()
            
            # Store in dictionary (using original feature name)
            if feature == 'Estimated_Key_Numeric':
                ordered_songs_dict['Estimated_Key'] = ordered_song_ids
            else:
                ordered_songs_dict[feature] = ordered_song_ids
        except Exception as e:
            print(f"Error processing feature {feature}: {e}")
    
    # Create a new DataFrame for the ordered song IDs
    result_df = pd.DataFrame({
        'feature': list(ordered_songs_dict.keys()),
        'ordered_song_ids': list(ordered_songs_dict.values())
    })
    
    # Save to CSV
    result_df.to_csv('ordered_songs_by_feature.csv', index=False)
    print(f"Saved ordered song IDs to ordered_songs_by_feature.csv")
#Create ordered lists of song ids by feature
# def main():
#     # Read the CSV file
#     df = pd.read_csv("C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv")  # Replace with your actual file name
    
#     # Convert Estimated_Key to numerical values
#     df['Estimated_Key_Numeric'] = df['Estimated_Key'].apply(key_to_number)
    
#     # Identify columns to process (exclude song_id, Title, Artist, Album)
#     exclude_cols = ['song_id', 'Title', 'Artist', 'Album']
#     feature_cols = [col for col in df.columns if col not in exclude_cols]
    
#     # Create a dictionary to store ordered song IDs and their values for each feature
#     ordered_songs_dict = {}
    
#     # For each feature, sort songs and extract ordered list of song_ids with values
#     for feature in feature_cols:
#         # Skip if the feature is non-numeric
#         if df[feature].dtype == 'object' and feature != 'Estimated_Key':
#             print(f"Skipping non-numeric feature: {feature}")
#             continue
        
#         # Use the numeric version for Estimated_Key for sorting, but preserve the original key name in output
#         if feature == 'Estimated_Key':
#             sort_feature = 'Estimated_Key_Numeric'
#             feature_to_store = 'Estimated_Key'
#         else:
#             sort_feature = feature
#             feature_to_store = feature
        
#         # Sort by feature value and extract song_ids with their values
#         try:
#             # Drop NaN values before sorting
#             sorted_df = df.dropna(subset=[sort_feature]).sort_values(by=sort_feature)
            
#             # Create list of tuples (song_id, feature_value)
#             if feature == 'Estimated_Key':
#                 # For Estimated_Key, store both original key and numeric value
#                 ordered_songs_with_values = [
#                     (row['song_id'], (row[feature_to_store], row[sort_feature])) 
#                     for _, row in sorted_df.iterrows()
#                 ]
#             else:
#                 ordered_songs_with_values = [
#                     (row['song_id'], row[feature_to_store]) 
#                     for _, row in sorted_df.iterrows()
#                 ]
            
#             # Store in dictionary
#             ordered_songs_dict[feature_to_store] = ordered_songs_with_values
            
#         except Exception as e:
#             print(f"Error processing feature {feature}: {e}")
#         print(feature)
#     # Create a new DataFrame for the ordered song IDs with values
#     result_df = pd.DataFrame({
#         'feature': list(ordered_songs_dict.keys()),
#         'ordered_song_ids_with_values': list(ordered_songs_dict.values())
#     })
    
#     # Save to CSV
#     result_df.to_csv('ordered_songs_by_feature_with_values.csv', index=False)
#     print(f"Saved ordered song IDs with values to ordered_songs_by_feature_with_values.csv")

# if __name__ == "__main__":
#     main()

In [76]:
import ast
def load_data1(csv_filename):
    """Load the CSV and parse the ordered song IDs as lists using pandas."""
    df = pd.read_csv(csv_filename)
    data = {row["feature"]: ast.literal_eval(row["ordered_song_ids"]) for _, row in df.iterrows()}
    return data
def load_data2(csv_filename):
    """Load the CSV and parse the ordered song IDs with values using pandas."""
    df = pd.read_csv(csv_filename)
    data = {row["feature"]: ast.literal_eval(row["ordered_song_ids_with_values"]) for _, row in df.iterrows()}
    return data

def load_data3(csv_filename):
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_filename)
    
    # Convert 'Estimated_Key' column to numerical data
    df['Estimated_Key'] = df['Estimated_Key'].apply(key_to_number)
    
    # Drop rows with NaN values in any column
    df = df.dropna()
    
    # Exclude non-numeric columns (Title, Artist, and Album) and select the numeric columns
    df_numeric = df.drop(columns=['Title', 'Artist', 'Album', 'song_id'])  # Exclude non-numeric and ID columns
    
    # Feature matrix with only numeric columns
    feature_matrix = df_numeric.values
    
    return feature_matrix

In [77]:
data1 = load_data1("C:\\Users\\Cheig\\capstone\\Spectralify\\reccomendation\\ordered_songs_by_feature.csv")

In [79]:
data_numpy = load_data3("C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv")

In [84]:
import pandas as pd

from collections import Counter

#Using ordered lists to find the nearest x songs with the option to use feature groups 
def find_nearest_songs(data, song_id, num_neighbors=1000, groups = None):
    """Find the 100 nearest songs for each feature and count occurrences."""
    song_counts = Counter()

    if groups is not None:
        mapping = {
        "basic": [
            "Duration_Seconds", "Tempo_BPM", "Beat_Regularity", "Beat_Density", "Beat_Strength"
        ],
        "pitch": [
            "Estimated_Key", "Key_Confidence", "Average_Pitch", "Pitch_Range", "pYIN_Pitch", "Harmonic_Salience"
        ],
        "spectral": [
            "Average_Spectral_Centroid", "Average_Spectral_Rolloff", "Average_Spectral_Bandwidth",
            "Spectral_Contrast_Mean", "Spectral_Entropy", "Spectral_Flatness", "Tonnetz_Features", "Polynomial_Coefficients"
        ],
        "energy": [
            "RMS_Energy_Mean", "RMS_Energy_Std", "Dynamic_Range", "Crest_Factor", "PCEN_Energy"
        ],
        "harmonic": [
            "Harmonic_Ratio", "Tonal_Energy_Ratio", "Variable_Q_Features", "Reassigned_Features"
        ],
        "rhythm": [
            "Groove_Consistency", "Pulse_Clarity", "Fourier_Tempogram", "Tempogram_Ratio", "Onset_Rate", "Onset_Strength_Mean"
        ],
        "structure": [
            "RQA_Features", "Path_Enhanced_Structure", "HPSS_Separation", "MultipleSegmentation_Boundaries"
        ]
        }
    

        
        columns = []
        for keyword in groups:
            columns.extend(mapping.get(keyword.lower(), []))
        for feature, song_list in data.items():
            if feature in columns:
                if song_id in song_list:
                    index = song_list.index(song_id)
                    nearest_songs = song_list[max(0, index - num_neighbors // 2): index + num_neighbors // 2]
                    song_counts.update(nearest_songs)
    
    
    else: 
        for feature, song_list in data.items():
            if song_id in song_list:
                index = song_list.index(song_id)
                nearest_songs = song_list[max(0, index - num_neighbors // 2): index + num_neighbors // 2]
                song_counts.update(nearest_songs)
    
    # Remove the original song ID itself from the count
    song_counts.pop(song_id, None)
    
    # Find the most common nearby song
    most_common_song = song_counts.most_common(20) if song_counts else (None, 0)
    
    return most_common_song

# Example usage
csv_filename = "C:\\Users\\Cheig\\capstone\\ordered_songs_by_feature.csv"  # Change this to your actual CSV filename
song_id_to_search = 0  # Change this to the song ID you're searching for

data = data1
most_common_song = find_nearest_songs(data, song_id_to_search )#groups=['basic', 'rhythm']
print(f"Most common nearby song: {most_common_song}")


Most common nearby song: [(44141, 20), (3272, 19), (17377, 19), (9102, 18), (15737, 18), (10489, 18), (3198, 17), (46504, 16), (45664, 16), (25878, 16), (3260, 16), (24195, 16), (42179, 16), (1843, 16), (29230, 16), (37719, 16), (23471, 16), (23556, 16), (17845, 15), (41684, 15)]


In [82]:
def centroid_method(feature_matrix, song_ids, num_recommendations=20):
    """
    Finds the closest songs to the centroid of the given songs.
    """
    centroid = np.mean(feature_matrix[song_ids], axis=0)
    distances = np.linalg.norm(feature_matrix - centroid, axis=1)
    
    nearest_indices = np.argsort(distances)
    recommended_songs = [idx for idx in nearest_indices if idx not in song_ids][:num_recommendations]
    
    return recommended_songs

print(centroid_method(data_numpy, [1,2,3]))

[14, 15613, 15633, 1001, 46508, 5542, 12842, 17361, 32124, 31987, 17591, 20135, 40055, 38038, 37898, 38092, 38203, 17378, 1787, 31587]


In [83]:
def multi_song_proximity_method(feature_matrix, song_ids, num_recommendations=20):
    """
    Finds songs that are closest to all input songs by summing their distances.
    """
    total_distance = np.sum([np.linalg.norm(feature_matrix - feature_matrix[song], axis=1) for song in song_ids], axis=0)
    
    nearest_indices = np.argsort(total_distance)
    recommended_songs = [idx for idx in nearest_indices if idx not in song_ids][:num_recommendations]
    
    return recommended_songs

print(multi_song_proximity_method(data_numpy, [1,2,3]))

[14, 5542, 1001, 26497, 15633, 46508, 15613, 20135, 62, 14931, 14583, 36455, 36423, 38139, 38027, 37897, 28308, 17361, 43732, 28914]


In [74]:
import pandas as pd
import numpy as np
from scipy.spatial import ConvexHull
import re
from collections import Counter
import random 
from sklearn.decomposition import PCA

def apply_pca(feature_matrix, n_components=20):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(feature_matrix)

# Function to convert key strings to numerical values
def key_to_number(key_str):
    """Convert key strings to numerical values.
    Major keys: C=0, C#=1, D=2, ... B=11
    Minor keys: Am=12, A#m=13, Bm=14, ... G#m=23
    
    Handles various notations:
    - Full notation: 'C major', 'D minor'
    - Short notation: 'C', 'Dm'
    - Symbol notation: 'C#', 'F#m'
    """
    if pd.isna(key_str) or key_str == '':
        return np.nan
    
    # Standardize the key string (replace flats with equivalent sharps)
    key_str = key_str.replace('Ab', 'G#').replace('Bb', 'A#').replace('Cb', 'B').replace('Db', 'C#').replace('Eb', 'D#')
    key_str = key_str.strip()
    
    # Define all possible notes
    notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    
    # Case 1: Full notation (e.g., "C major", "D minor")
    full_notation = re.match(r'([A-G][#]?)\s*(major|minor)$', key_str)
    if full_notation:
        note, scale = full_notation.groups()
        note_idx = notes.index(note) if note in notes else -1
        if note_idx >= 0:
            if scale == 'major':
                return note_idx
            else:  # minor
                return note_idx + 3 + 12  # Relative minor is 3 semitones up, then add 12 to differentiate
    
    # Case 2: Short notation with explicit minor (e.g., "Dm", "F#m")
    short_minor = re.match(r'([A-G][#]?)m$', key_str)
    if short_minor:
        note = short_minor.group(1)
        note_idx = notes.index(note) if note in notes else -1
        if note_idx >= 0:
            return note_idx + 3 + 12  # Minor key
    
    # Case 3: Just the note name - assume major (e.g., "C", "F#")
    if key_str in notes:
        return notes.index(key_str)  # Major key
    
    print(f"Could not parse key: {key_str}")
    return np.nan

# Function to load data and extract numeric features
def load_data3(csv_filename):
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_filename)
    
    # Convert 'Estimated_Key' column to numerical data
    df['Estimated_Key'] = df['Estimated_Key'].apply(key_to_number)
    
    # Exclude non-numeric columns (Title, Artist, and Album) and select the numeric columns
    df_numeric = df.drop(columns=['Title', 'Artist', 'Album', 'song_id'])  # Exclude non-numeric and ID columns
    
    # Feature matrix with only numeric columns
    feature_matrix = df_numeric.values
    
    return feature_matrix, df

# Function for dimensional plane method with convex hull
def dimensional_plane_method(feature_matrix, song_ids, pca_components=20):
    """
    Finds songs within the convex hull formed by input songs in the feature space.
    """
    # Apply PCA to reduce dimensionality
    feature_matrix_reduced = apply_pca(feature_matrix, n_components=pca_components)
    
    points = feature_matrix_reduced[song_ids]  # Extract features of the input songs
    print(points)
    try:
        hull = ConvexHull(points)
    except QhullError:
        print("Qhull precision error: points may be coplanar or nearly identical.")
        return []
    
    inside_songs = []
    for i, song in enumerate(feature_matrix_reduced):
        if i not in song_ids and all(np.dot(eq[:-1], song) + eq[-1] <= 0 for eq in hull.equations):
            inside_songs.append(i)
    
    return inside_songs

# Load the data
data_numpy, df = load_data3("C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv")
x = data_numpy[~np.isnan(data_numpy)]
print(np.count_nonzero(np.isnan(x)))
print(x)
# Example usage of the dimensional plane method with the first song (index 0)
# You can specify any song_id(s) to be your input songs
input_song_ids = range(200)  # Example song indices
recommended_songs = dimensional_plane_method(x, input_song_ids)

# Print recommended song IDs within the convex hull
print(f"Songs within the convex hull: {recommended_songs}")

KeyboardInterrupt: 

In [None]:
import pandas as pd
import ast
import bisect
from collections import Counter


# same as find_nearest_songs but uses a binary search 
def find_nearest_songs(data, song_id, num_neighbors=100):
    """Find the 100 nearest songs using binary search based on values."""
    song_counts = Counter()
    
    for feature, song_list in data.items():
        song_ids = [song[0] for song in song_list]  # Extract song IDs
        values = [song[1] for song in song_list]  # Extract values
        
        index = bisect.bisect_left(song_ids, song_id)  # Find position using binary search
        if index < len(song_ids) and song_ids[index] == song_id:
            nearest_songs = song_ids[max(0, index - num_neighbors // 2): index + num_neighbors // 2]
            song_counts.update(nearest_songs)
    
    # Remove the original song ID itself from the count
    song_counts.pop(song_id, None)
    
    # Find the most common nearby song
    most_common_song= song_counts.most_common(10) if song_counts else (None, 0)
    
    return most_common_song,

# Example usage
csv_filename = "C:\\Users\\Cheig\\capstone\\ordered_songs_by_feature_with_values.csv"  # Change this to your actual CSV filename
song_id_to_search = 1  # Change this to the song ID you're searching for

data = load_data(csv_filename)
most_common_song = find_nearest_songs(data, song_id_to_search)
print(f"Most common nearby song: {most_common_song}")


KeyboardInterrupt: 

In [4]:
import pandas as pd
distance_matrix = pd.read_csv("distances2.csv", index_col=0)

In [8]:
def find_similar_songs(song_id, n=10):
    """
    Find the n most similar songs to the given song_id.
    
    Parameters:
    csv_file (str): Path to the CSV file containing the distance matrix
    song_id (str): ID of the song to find similar songs for
    n (int): Number of similar songs to return (default: 10)
    
    Returns:
    list: List of tuples (song_id, distance) for the n most similar songs
    """
    
    # Check if the song_id exists in the dataset
    if song_id not in distance_matrix.index:
        return f"Song ID '{song_id}' not found in the dataset."
    
    # Get the row for the specified song_id
    distances = distance_matrix.loc[song_id]
    
    # Sort the distances in ascending order (closest first)
    # Exclude the song itself (distance = 0)
    sorted_distances = distances.sort_values()
    
    # Remove the song itself (will be at index 0 with distance 0)
    if sorted_distances.index[0] == song_id or sorted_distances.iloc[0] == 0:
        sorted_distances = sorted_distances.iloc[1:]
    
    # Return the top n similar songs with their distances
    similar_songs = [(idx, dist) for idx, dist in 
                     zip(sorted_distances.index[:n], sorted_distances.values[:n])]
    
    return similar_songs

# Example usage
if __name__ == "__main__":
    # Replace with your actual CSV file path and song ID
    song_id = 100  # Replace with your actual song ID
    
    similar_songs = find_similar_songs(song_id)
    print(similar_songs)
    print(f"Top 10 songs most similar to {song_id}:")
    for i, (similar_id, distance) in enumerate(similar_songs, 1):
        print(f"{i}. Song ID: {similar_id}, Distance: {distance:.4f}")

[('90', 5.795593522907614), ('75', 6.005682414950607), ('24497', 6.02461700560429), ('189', 6.239342590205207), ('45212', 6.313814136778167), ('15733', 6.330860637777792), ('98', 6.393084541841931), ('45255', 6.451410435094632), ('107', 6.464909964167343), ('25828', 6.487933809127037)]
Top 10 songs most similar to 100:
1. Song ID: 90, Distance: 5.7956
2. Song ID: 75, Distance: 6.0057
3. Song ID: 24497, Distance: 6.0246
4. Song ID: 189, Distance: 6.2393
5. Song ID: 45212, Distance: 6.3138
6. Song ID: 15733, Distance: 6.3309
7. Song ID: 98, Distance: 6.3931
8. Song ID: 45255, Distance: 6.4514
9. Song ID: 107, Distance: 6.4649
10. Song ID: 25828, Distance: 6.4879


In [3]:
import pandas as pd
import argparse

def find_song_by_id(csv_file, song_id):
    try:
        with open(csv_file, 'r') as file:
            for line in file:
                if line.startswith(str(song_id) + ","):
                    print(line.strip())
                    return
        print(f"No song found with song_id: {song_id}")
    except FileNotFoundError:
        print(f"Error: The file '{csv_file}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    find_song_by_id("C:\\Users\\Cheig\\OneDrive\\Desktop\\csvs\\data_with_ids.csv", 11222)


11222,TE MATA,Feid,INTER SHIBUYA (FERXXO EDITION),113.12,C major,0.6812122948850745,1328.3975830078125,966.456787109375,3853.69873046875,138.19027210595792,84.46632711250275,553.263656988684,0.5814168377823409,0.1727958024910659,0.0518779902587347,5.78714412294581,0.1166161075234413,112.34714673913044,18.640080416880373,1.60007072135785,1.4009597301483154,27.36082000793177,3485.3446391149887,1872.8492680223808,7551.129045157597,4018.727730383597,3637.869828118626,1465.2302040459315,22.47957206700756,14.476959087449586,1.0,0.0038064462132751,0.0933149676440971,0.0302356409083095,0.0056167947021982,0.0567645261216662,0.0015421378393131,-0.0016682281797917,2.1284595522415648e-10,-4.88952000080811e-07,0.0003821593249041,-0.118290635394755,13.021915441176535,0.2083112150430679,0.1574039012193679,0.7005082368850708,3.829923152923584,0.2144597433609426,0.3889042381110932,0.1166161075234413,0.0660918429493904,1.4765149354934692,0.463833212852478,0.7566723227500916,1.87850284576416,15.738718032

In [2]:
import csv

def read_specific_row(filename, row_number):
    """Reads a specific row from a CSV file.

    Args:
        filename (str): The path to the CSV file.
        row_number (int): The row number to read (starting from 0).

    Returns:
        list: A list of strings representing the row, or None if the row 
              number is invalid.
    """
    try:
        with open(filename, 'r') as file:
            
            reader = csv.reader(file)
            print("eeeee")
            rows = list(reader)
            print("eeeee")
            if 0 <= row_number < len(rows):
                return rows[row_number]
            else:
                return None
    except FileNotFoundError:
        return None
    
# Example usage
filename = 'my_data.csv'
row_to_read = 2
data = read_specific_row("distances2.csv", 11111)

if data:
    print(f"Row {row_to_read}: {data}")
else:
    print(f"Row {row_to_read} not found or file does not exist.")

eeeee


KeyboardInterrupt: 