In [1]:
# Загрузка библиотек
import pandas as pd
import sqlite3

In [2]:
# Путь к базе данных
db_path = 'spotify_music.db'

In [3]:
# Загрузка данных из CSV и добавление в SQLite
def load_data_to_db(csv_path):
    try:
        # Загрузка данных из CSV в DataFrame
        data = pd.read_csv(csv_path)
        print("Data loaded successfully.")

        # Подключение к базе данных
        conn = sqlite3.connect(db_path)

        # Сохранение данных в таблицу 'tracks'
        table_name = 'tracks'
        data.to_sql(table_name, conn, if_exists='replace', index=False)
        print(f"Table '{table_name}' created and data inserted successfully.")

        # Удаление дубликатов из таблицы tracks
        remove_duplicates(conn, table_name)

    except FileNotFoundError:
        print("CSV file not found. Please check the file path.")
    except sqlite3.Error as e:
        print(f"Error occurred: {e}")
    finally:
        if conn:
            conn.close()

In [4]:
# Функция для удаления дубликатов в таблице tracks
def remove_duplicates(conn, table_name):
    try:
        cursor = conn.cursor()

        # Удаление дубликатов на основе уникальных комбинаций track_name, artists и popularity
        cursor.execute(f"""
        DELETE FROM {table_name}
        WHERE rowid NOT IN (
            SELECT MIN(rowid)
            FROM {table_name}
            GROUP BY track_name, artists, popularity
        );
        """)
        conn.commit()
        print("Duplicates removed successfully.")

    except sqlite3.Error as e:
        print(f"Error occurred while removing duplicates: {e}")

In [5]:
# Универсальная функция для выполнения SQL-запросов
def execute_query(query, result_processor):
    """
    Выполняет SQL-запрос и обрабатывает результаты через переданную функцию обработки.

    :param query: SQL-запрос, который нужно выполнить.
    :param result_processor: Функция для обработки результатов запроса.
    """
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # Выполнение запроса
        cursor.execute(query)
        results = cursor.fetchall()

        # Обработка результатов через переданную функцию
        result_processor(results)

    except sqlite3.Error as e:
        print(f"Error occurred: {e}")

    finally:
        if conn:
            conn.close()

In [6]:
# Функция обработки результатов для get_top_popular_tracks
def process_top_popular_tracks(results):
    print("\nTop 10 Popular Tracks:")
    if results:
        for idx, (track_name, artists, popularity) in enumerate(results, start=1):
            print(f"{idx}. {track_name} by {artists} - Popularity: {popularity}")
    else:
        print("No data found.")


# Функция для получения самых популярных треков без дубликатов
def get_top_popular_tracks():
    query = """
    SELECT DISTINCT track_name, artists, popularity
    FROM tracks
    ORDER BY popularity DESC
    LIMIT 10;
    """
    execute_query(query, process_top_popular_tracks)

In [7]:
# Функция обработки результатов для get_top_artists_by_genre
def process_top_artists_by_genre(results):
    print("\nTop Artists by Genre:")
    if results:
        for idx, (genre, artist, avg_popularity) in enumerate(results, start=1):
            print(f"{idx}. Genre: {genre}, Artist: {artist} - Average Popularity: {avg_popularity:.2f}")
    else:
        print("No data found.")

In [8]:
# Функция для нахождения самых популярных исполнителей по жанрам
def get_top_artists_by_genre():
    query = """
    SELECT track_genre, artists, AVG(popularity) as avg_popularity
    FROM tracks
    GROUP BY track_genre, artists
    ORDER BY avg_popularity DESC
    LIMIT 10;
    """
    execute_query(query, process_top_artists_by_genre)

In [9]:
# Функция обработки результатов для анализа по explicit-содержанию
def process_explicit_tracks(results):
    print("\nAnalysis of Explicit and Non-Explicit Tracks:")
    if results:
        for idx, (explicit, track_count, avg_popularity) in enumerate(results, start=1):
            explicit_label = 'Explicit' if explicit == 1 else 'Non-Explicit'
            print(f"{idx}. {explicit_label} Tracks: {track_count}, Avg Popularity: {avg_popularity:.2f}")
    else:
        print("No data found.")

In [10]:
# Функция для анализа explicit треков
def analyze_explicit_tracks():
    query = """
    SELECT explicit, COUNT(*) as track_count, AVG(popularity) as avg_popularity
    FROM tracks
    GROUP BY explicit
    ORDER BY track_count DESC;
    """
    execute_query(query, process_explicit_tracks)

In [11]:
# Функция обработки результатов для сравнения артистов
def process_compare_artists(results):
    print("\nComparison of Top Artists by Track Count and Popularity:")
    if results:
        for idx, (artists, track_count, avg_popularity) in enumerate(results, start=1):
            print(f"{idx}. Artist: {artists}, Track Count: {track_count}, Avg Popularity: {avg_popularity:.2f}")
    else:
        print("No data found for artists.")

In [12]:
# Функция для сравнения артистов
def compare_artists():
    query = """
    SELECT artists, COUNT(track_name) as track_count, AVG(popularity) as avg_popularity
    FROM tracks
    GROUP BY artists
    ORDER BY avg_popularity DESC, track_count DESC
    LIMIT 10;
    """
    execute_query(query, process_compare_artists)

In [13]:
# Запуск функций для анализа данных
get_top_popular_tracks()
get_top_artists_by_genre()
analyze_explicit_tracks()
compare_artists()


Top 10 Popular Tracks:
1. Unholy (feat. Kim Petras) by Sam Smith;Kim Petras - Popularity: 100
2. Quevedo: Bzrp Music Sessions, Vol. 52 by Bizarrap;Quevedo - Popularity: 99
3. I'm Good (Blue) by David Guetta;Bebe Rexha - Popularity: 98
4. La Bachata by Manuel Turizo - Popularity: 98
5. Me Porto Bonito by Bad Bunny;Chencho Corleone - Popularity: 97
6. Tití Me Preguntó by Bad Bunny - Popularity: 97
7. Under The Influence by Chris Brown - Popularity: 96
8. Efecto by Bad Bunny - Popularity: 96
9. I Ain't Worried by OneRepublic - Popularity: 96
10. Ojitos Lindos by Bad Bunny;Bomba Estéreo - Popularity: 95

Top Artists by Genre:
1. Genre: dance, Artist: Sam Smith;Kim Petras - Average Popularity: 100.00
2. Genre: hip-hop, Artist: Bizarrap;Quevedo - Average Popularity: 99.00
3. Genre: latin, Artist: Manuel Turizo - Average Popularity: 98.00
4. Genre: latin, Artist: Bad Bunny;Chencho Corleone - Average Popularity: 97.00
5. Genre: latin, Artist: Bad Bunny - Average Popularity: 95.00
6. Genre: la