In [1]:
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("MySQL Integration") \
    .config("spark.jars", "C:/Program Files (x86)/MySQL/mysql-connector-j-8.3.0/mysql-connector-j-8.3.0.jar") \
    .getOrCreate()


In [2]:
# Define MySQL database connection parameters
database_url = "jdbc:mysql://localhost:3306/classicmodels"
table_name = "customers"
database_properties = {
    "user": "root",
    "password": "hola",
    "driver": "com.mysql.jdbc.Driver"
}

# Load data from the MySQL database table into a Spark DataFrame
df = spark.read.jdbc(url=database_url, table=table_name, properties=database_properties)

# Show the DataFrame
df.show()

+--------------+--------------------+---------------+----------------+-----------------+--------------------+--------------------+-------------+--------+----------+---------+----------------------+-----------+
|customerNumber|        customerName|contactLastName|contactFirstName|            phone|        addressLine1|        addressLine2|         city|   state|postalCode|  country|salesRepEmployeeNumber|creditLimit|
+--------------+--------------------+---------------+----------------+-----------------+--------------------+--------------------+-------------+--------+----------+---------+----------------------+-----------+
|           103|   Atelier graphique|        Schmitt|         Carine |       40.32.2555|      54, rue Royale|                NULL|       Nantes|    NULL|     44000|   France|                  1370|   21000.00|
|           112|  Signal Gift Stores|           King|            Jean|       7025551838|     8489 Strong St.|                NULL|    Las Vegas|      NV|     83

In [3]:
# Convert Spark DataFrame to Pandas DataFrame
pandas_df = df.toPandas()

# Display the Pandas DataFrame
print(pandas_df)


     customerNumber                    customerName contactLastName  \
0               103               Atelier graphique         Schmitt   
1               112              Signal Gift Stores            King   
2               114      Australian Collectors, Co.        Ferguson   
3               119               La Rochelle Gifts         Labrune   
4               121              Baane Mini Imports      Bergulfsen   
..              ...                             ...             ...   
117             486    Motor Mint Distributors Inc.         Salazar   
118             487        Signal Collectibles Ltd.          Taylor   
119             489  Double Decker Gift Stores, Ltd           Smith   
120             495            Diecast Collectables          Franco   
121             496               Kelly's Gift Shop         Snowden   

    contactFirstName           phone                  addressLine1  \
0            Carine       40.32.2555                54, rue Royale   
1      

In [5]:
pandas_df

Unnamed: 0,customerNumber,customerName,contactLastName,contactFirstName,phone,addressLine1,addressLine2,city,state,postalCode,country,salesRepEmployeeNumber,creditLimit
0,103,Atelier graphique,Schmitt,Carine,40.32.2555,"54, rue Royale",,Nantes,,44000,France,1370.0,21000.00
1,112,Signal Gift Stores,King,Jean,7025551838,8489 Strong St.,,Las Vegas,NV,83030,USA,1166.0,71800.00
2,114,"Australian Collectors, Co.",Ferguson,Peter,03 9520 4555,636 St Kilda Road,Level 3,Melbourne,Victoria,3004,Australia,1611.0,117300.00
3,119,La Rochelle Gifts,Labrune,Janine,40.67.8555,"67, rue des Cinquante Otages",,Nantes,,44000,France,1370.0,118200.00
4,121,Baane Mini Imports,Bergulfsen,Jonas,07-98 9555,Erling Skakkes gate 78,,Stavern,,4110,Norway,1504.0,81700.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,486,Motor Mint Distributors Inc.,Salazar,Rosa,2155559857,11328 Douglas Av.,,Philadelphia,PA,71270,USA,1323.0,72600.00
118,487,Signal Collectibles Ltd.,Taylor,Sue,4155554312,2793 Furth Circle,,Brisbane,CA,94217,USA,1165.0,60300.00
119,489,"Double Decker Gift Stores, Ltd",Smith,Thomas,(171) 555-7555,120 Hanover Sq.,,London,,WA1 1DP,UK,1501.0,43300.00
120,495,Diecast Collectables,Franco,Valarie,6175552555,6251 Ingle Ln.,,Boston,MA,51003,USA,1188.0,85100.00


In [6]:
# First, we need to import the necessary libraries: Keras for the IMDb dataset and pandas for dataframe manipulation
import pandas as pd
from keras.datasets import imdb

# Load the IMDb dataset
# The dataset is pre-split into training and test sets, and we're loading it with a predefined number of words
# For demonstration, let's load data considering the top 10000 most frequent words
top_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)

# The dataset is loaded as lists of integers. For simplicity, let's focus on converting the training set to a dataframe.
# Each integer represents a word in a review. To visualize this in a dataframe, we might convert the integers back to words.
# First, let's create a mapping from integers back to words using the word index provided by IMDb.

word_index = imdb.get_word_index()
# The word_index contains words as keys and integers as values. We invert this to map integers to words.
int_to_word = {value: key for key, value in word_index.items()}

# For demonstration, let's convert the first review back to text.
first_review = [int_to_word.get(i - 3, '?') for i in x_train[0]] # IMDb dataset reserves indices 0, 1, and 2 for special tokens

# Now, create a simple dataframe with a single review for demonstration purposes
# In a real scenario, you might want to convert more reviews or handle the data differently
df = pd.DataFrame({'review': [' '.join(first_review)]})

df


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


Unnamed: 0,review
0,? this film was just brilliant casting locatio...


In [7]:
import pandas as pd
from keras.datasets import imdb

# Parameters
top_words = 10000
num_reviews = 100  # Adjust based on your environment's capabilities

# Load dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)

# Prepare word index
word_index = imdb.get_word_index()
int_to_word = {value + 3: key for key, value in word_index.items()}
# Mapping special tokens
int_to_word[0] = '<PAD>'
int_to_word[1] = '<START>'
int_to_word[2] = '<UNK>'
int_to_word[3] = '<UNUSED>'

# Convert the first `num_reviews` reviews to text
reviews_text = []
for i in range(num_reviews):
    review_text = ' '.join(int_to_word.get(i, '?') for i in x_train[i])
    reviews_text.append(review_text)

# Create DataFrame
df_reviews = pd.DataFrame({'review': reviews_text, 'sentiment': y_train[:num_reviews]})

# Show the DataFrame head to verify
df_reviews.head()


Unnamed: 0,review,sentiment
0,<START> this film was just brilliant casting l...,1
1,<START> big hair big boobs bad music and a gia...,0
2,<START> this has to be one of the worst films ...,0
3,<START> the <UNK> <UNK> at storytelling the tr...,1
4,<START> worst mistake of my life br br i picke...,0
