In [1]:
!pip install weaviate-client
!pip install pandas
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


In [4]:
import weaviate
from dotenv import dotenv_values


config = dotenv_values(".env")

client = weaviate.Client(
  url=config["WEAVIATE_URL"],  # URL of your Weaviate instance
  additional_headers={  
    "X-OpenAI-Api-Key": config["OPENAI_API_KEY"],
  }
)

## CREATE CLASS SCHEMA AND EXPORT TO JSON

In [70]:
import json

class_obj = {
    "class": "Songs",
    "vectorizer": "text2vec-openai",
    "moduleConfig": {
        "generative-openai": {
            "model": "gpt-3.5-turbo"
        }
    },
    "properties": [
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_name",
            "description": ""
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_id",
            "description": "The unique identifier of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_artist",
            "description": "The artist or artists associated with the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "lyrics",
            "description": "The lyrics of the song."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_popularity",
            "description": "The popularity score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_album_id",
            "description": "The unique identifier of the album to which the track belongs."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_album_name",
            "description": "The name of the album to which the track belongs."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "track_album_release_date",
            "description": "The release date of the album to which the track belongs."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "playlist_name",
            "description": "The name of the playlist the track belongs to."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "playlist_id",
            "description": "The unique identifier of the playlist the track belongs to."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "playlist_genre",
            "description": "The genre of the playlist the track belongs to."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "playlist_subgenre",
            "description": "The subgenre of the playlist the track belongs to."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "danceability",
            "description": "The danceability score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "energy",
            "description": "The energy score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "key",
            "description": "The key of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "loudness",
            "description": "The loudness score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "mode",
            "description": "The mode of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "speechiness",
            "description": "The speechiness score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "acousticness",
            "description": "The acousticness score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "instrumentalness",
            "description": "The instrumentalness score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "liveness",
            "description": "The liveness score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "valence",
            "description": "The valence score of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "tempo",
            "description": "The tempo of the track."
        },
        {
            "dataType": ["text"],
            "moduleConfig": {
                "text2vec-openai": {
                    "skip": False,
                    "vectorizePropertyName": False
                }
            },
            "name": "duration_ms",
            "description": "The duration of the track in milliseconds."
        }
    ]
}

class_name = class_obj["class"]
# Write JSON data to a text file with class name
file_name = f"{class_name.lower()}_class_schema.json"

# Convert class_obj to JSON
json_data = json.dumps(class_obj, indent=4)

with open(file_name, "w") as file:
    file.write(json_data)

print(f"Class object exported to {file_name}")

Class object exported to songs_class_schema.json


## CREATE CLASS

In [68]:
client.schema.create_class(class_obj)

## DELETE CLASS

In [66]:
client.schema.delete_class("Songs")

## GET CLASS OBJECTS

In [76]:
all_objects = client.data_object.get(class_name="Songs")
print(all_objects)

{'deprecations': [], 'objects': [{'class': 'Songs', 'creationTimeUnix': 1685739187061, 'id': '01356234-fe67-493b-a6b1-6192255a66f7', 'lastUpdateTimeUnix': 1685739187061, 'properties': {'acousticness': '0.853', 'danceability': '0.794', 'duration_ms': '342040', 'energy': '0.32', 'instrumentalness': '0.134', 'key': '1', 'language': 'en', 'liveness': '0.112', 'loudness': '-12.92', 'lyrics': "I spent the whole day in my head Do a little spring cleanin' I'm always too busy\u2005dreamin' Well,\u2005maybe I should\u2005wake up instead A lot of things\u2005I regret, but I just say I forget Why can't it just be easy? Why does everybody need me to stay? Oh, I hate the feeling When you're high but you're underneath the ceiling Got the cards in my hand, I hate dealing, yeah Get everything I need then I'm gone, but it ain't stealing Can I get a break? I wish that I could just get out my goddamn way What is there to say? There ain't a better time than today But maybe I'll lay down for a little, yeah 

# IMPORT SONG LYRICS TO DATABASE

In [72]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('spotify_songs.csv')

# Sort the DataFrame based on track_popularity column in descending order
df_sorted = df.sort_values(by='track_popularity', ascending=False)

# Select the top 100 rows
df_top_100 = df_sorted.head(100)

# Convert all columns to string
df_top_100 = df_top_100.astype(str)

# Get the property names from the CSV file
property_names = df_top_100.columns.tolist()



# Python client specific configurations can be set with `client.batch.configure`
# the settings can be applied to both `objects` AND `references`.
# You have to only set them once.
client.batch.configure(
  # `batch_size` takes an `int` value to enable auto-batching
  # (`None` is used for manual batching)
  batch_size=100,
  # dynamically update the `batch_size` based on import speed
  dynamic=False,
  # `timeout_retries` takes an `int` value to retry on time outs
  timeout_retries=3,
  # checks for batch-item creation errors
  # this is the default in weaviate-client >= 3.6.0
  callback=weaviate.util.check_batch_result,
  consistency_level=weaviate.data.replication.ConsistencyLevel.ALL,  # default QUORUM
)

with client.batch as batch:
  for _, row in df_top_100.iterrows():
    # Create an empty object
    obj_props = {}
    
    # Populate object properties from the CSV file
    for prop_name in property_names:
        obj_props[prop_name] = row[prop_name]
    
    # Add the object to the batch operation
    batch.add_data_object(obj_props, "Songs")