In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
import urllib

delta_path = "dbfs:/user/hive/warehouse/authentication_credentials"
credentials_df = spark.read.format("delta").load(delta_path)

ACCESS_KEY = credentials_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = credentials_df.select('Secret access key').collect()[0]['Secret access key']

ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:
%sql
SET spark.databricks.delta.formatCheck.enabled=false;
SET spark.databricks.kinesis.listShards.enabled=false

key,value
spark.databricks.kinesis.listShards.enabled,False


In [0]:
from pyspark.sql.functions import expr
from pyspark.sql.functions import from_json
from pyspark.sql.types import StructType, StructField, StringType, IntegerType


stream_pin_df = spark \
    .readStream \
    .format('kinesis') \
    .option('streamName', 'Kinesis-Prod-Stream') \
    .option('initialPosition', 'latest') \
    .option('region', 'us-east-1') \
    .option('accessKeyId', ACCESS_KEY) \
    .option('secretAccessKey', SECRET_KEY) \
    .load()


pin_schema = StructType([
    StructField("index", IntegerType(), True),
    StructField("unique_id", StringType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("poster_name", StringType(), True),
    StructField("follower_count", StringType(), True),
    StructField("tag_list", StringType(), True),
    StructField("is_image_or_video", StringType(), True),
    StructField("image_src", StringType(), True),
    StructField("downloaded", IntegerType(), True),
    StructField("save_location", StringType(), True),
    StructField("category", StringType(), True),
    ])



stream_pin_df = stream_pin_df.filter(stream_pin_df.partitionKey == "pin-partition")
stream_pin_df = stream_pin_df.selectExpr("CAST(data as STRING) jsonData")
stream_pin_df = stream_pin_df.select(from_json("jsonData", pin_schema).alias("data")).select("data.*")

display(stream_pin_df)
    

index,unique_id,title,description,poster_name,follower_count,tag_list,is_image_or_video,image_src,downloaded,save_location,category
1704,5fbf9863-fb79-477c-a5b6-540c3020a55f,Christmas Trees From Pallet Wood | Holiday DIY,Christmas Trees From Pallet Wood | Holiday DIY: Deck the yard with some fun outdoor Christmas Trees! We made these merry and bright decorations from two old pallets we had lying…,Instructables,3M,"Pallet Wood Christmas Tree,Wooden Christmas Crafts,Diy Christmas Tree,Christmas Projects,Holiday Crafts,Wooden Xmas Trees,Different Christmas Trees,Pallet Tree,Christmas Kitchen",image,https://i.pinimg.com/originals/64/7b/ca/647bca35169b7c144604116c64bcba8a.png,1,Local save in /data/christmas,christmas
2482,08604f20-fa17-4b9a-9949-781717eca6cd,FORNT PORCH CHRISTMAS DECORATING IDEAS,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…","Life on Summerhill | Home, Holiday Decor & DIY Website",46k,"Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",video,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,1,Local save in /data/christmas,christmas
8930,a2999c28-f7b2-4577-af87-49d21d6d8a18,135 Sunflower Tattoo Ideas - [Best Rated Designs in 2021],We have put together the Ultimative Sunflower Tattoo Collection in 2020. Check out our highest rated handpicked Sunflower designs here!,Next Luxury,800k,"Sunflower Tattoo Sleeve,Sunflower Tattoo Shoulder,Sunflower Tattoo Small,Sunflower Tattoos,Sunflower Tattoo Design,Shoulder Tattoo,Sunflower Mandala Tattoo,Sunflower Tattoo Meaning,Form Tattoo",image,https://i.pinimg.com/originals/18/de/6c/18de6c81a8637e224c7d63dce1414ceb.jpg,1,Local save in /data/tattoos,tattoos
7294,9d8409ef-bea7-4133-a021-c58b9c042210,Foolproof Fall Fashion For Men You'll Love,Looking for the coolest fall fashion for men this season? Click to read this article on fall fashion for men to look sharp on LLEGANCE.,LLEGANCE | Workwear Fashion Woman Advice For Driven Professionals,28k,"Fashion 90s,Autumn Fashion,Fashion Ideas,Trendy Fashion,Fashion Photo,Fashion Outfits,Fashion Trends,Urban Fashion Men,Men's Casual Fashion",image,https://i.pinimg.com/originals/4c/bf/2d/4cbf2d6e04d2502612169220553f4cef.jpg,1,Local save in /data/mens-fashion,mens-fashion
808,23f67e42-db9f-4aae-8b82-cbec64ad9f4d,Chalk Pastel Hot Chocolate Art Project,Kids will use chalk pastels and black paper to create this bold and bright hot chocolate art project! Easy to follow tutorial included.,Projects with Kids,20k,"Hot Chocolate Art,Chocolate Crafts,Art Activities For Kids,Preschool Art,Preschool Winter,Christmas Activities,Winter Art Projects,Clay Art Projects,Chalk Pastel Art",image,https://i.pinimg.com/originals/5a/d3/82/5ad38202cf8acb022cb8f46ce2a78ad6.jpg,1,Local save in /data/art,art
771,a5021766-a8aa-4dc7-9857-4da6b8e3dc1a,African Sunset Shadow Tracing Art - Taming Little Monsters,"Use your mini world figures to create this beautiful African sunset. Your kids will love learning about shadows, angles and distortion in this fun art and STEM activity for kids.",Taming Little Monsters - Fun Activities for Kids,4k,"African Art Projects,Cool Art Projects,Projects For Kids,African Art For Kids,African Crafts Kids,Art Club Projects,Art Education Projects,Tracing Art,African Sunset",image,https://i.pinimg.com/originals/e3/aa/35/e3aa350f8f104d0e59f26d7f17ea7461.png,1,Local save in /data/art,art
4076,3a52d364-7c04-47cb-a3e5-56d9e2b77528,Phonics Activities Your Kids Will Love - The Literacy Nest,"Hi everyone! As a teacher using the Orton-Gillingham approach, I am constantly looking for phonics activities that my students will find fun and engaging. Using Orton-Gillingham…",The Literacy Nest,22k,"Literacy Games,Kindergarten Activities,Literacy Centers,Fun Phonics Activities,Listening Activities,Vocabulary Games,Literacy Stations,Letter Activities,Montessori Activities",image,https://i.pinimg.com/originals/58/8e/38/588e380b19942a71a86a69d9c9973d25.png,1,Local save in /data/education,education
1545,0c629541-cc5b-4b61-bd3f-613346893258,Oil Cleansing Method: What it is and why you should do it.,"The oil cleansing method is a simple, natural way to have clean skin without using toxic chemicals. It will leave your face radiant and soft!",Thank Your Body,85k,"Beauty Care,Diy Beauty,Beauty Hacks,Beauty Shop,Oil Cleansing Method,Goji,Korean Skincare Routine,Homemade Beauty Products,Organic Beauty",image,https://i.pinimg.com/originals/2c/1c/da/2c1cda7da86ee711536bbacfe89c75c9.jpg,1,Local save in /data/beauty,beauty
2293,1fd7d4cc-54c1-4542-9c1f-449cb4c875f4,Over 40 of the BEST Homemade Christmas Ornament Ideas,Over 40 of the BEST Homemade Christmas Ornaments...these easy Holiday crafts are so fun to make for Kids and Adults! Lots of great class party ideas!,Kitchen Fun With My 3 Sons,245k,"Diy Christmas Lights,Homemade Christmas Decorations,Christmas Crafts For Gifts,Mini Christmas Tree,Christmas Ornament Crafts,Homemade Christmas Gifts,Xmas Decorations,Simple Christmas,Handmade Christmas",image,https://i.pinimg.com/originals/d0/d3/a3/d0d3a31e87296224d5ba8896904b5b88.jpg,1,Local save in /data/christmas,christmas
10509,6a5f0e7e-1389-4e54-942f-4cb69f09f140,Flying Car,No description available Story format,TheSuperBOO!,89k,"Electric Off Road Vehicle,Cool Illusions,Flying Vehicles,Wow Video,Art And Craft Videos,Top Luxury Cars,Chevy Muscle Cars,Street Racing Cars,Funny Videos Clean",multi-video(story page format),https://i.pinimg.com/videos/thumbnails/originals/4a/d4/96/4ad4961606179854f1141a90f50f002c.0000001.jpg,1,Local save in /data/vehicles,vehicles


[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
File [0;32m<command-4087974376823059>, line 19[0m
[1;32m      3[0m [38;5;28;01mfrom[39;00m [38;5;21;01mpyspark[39;00m[38;5;21;01m.[39;00m[38;5;21;01msql[39;00m[38;5;21;01m.[39;00m[38;5;21;01mtypes[39;00m [38;5;28;01mimport[39;00m StructType, StructField, StringType, IntegerType
[1;32m      6[0m stream_pin_df [38;5;241m=[39m spark \
[1;32m      7[0m     [38;5;241m.[39mreadStream \
[1;32m      8[0m     [38;5;241m.[39mformat([38;5;124m'[39m[38;5;124mkinesis[39m[38;5;124m'[39m) \
[0;32m   (...)[0m
[1;32m     13[0m     [38;5;241m.[39moption([38;5;124m'[39m[38;5;124msecretAccessKey[39m[38;5;124m'[39m, SECRET_KEY) \
[1;32m     14[0m     [38;5;241m.[39mload()
[1;32m     17[0m pin_schema [38;5;241m=[39m StructType([
[1;32m     18[0m     StructField([38;5;124m"[39m

In [0]:
from pyspark.sql.functions import expr
from pyspark.sql.functions import from_json
from pyspark.sql.types import StructType, StructField, StringType, IntegerType


stream_geo_df = spark \
    .readStream \
    .format('kinesis') \
    .option('streamName', 'Kinesis-Prod-Stream') \
    .option('initialPosition', 'latest') \
    .option('region', 'us-east-1') \
    .option('accessKeyId', ACCESS_KEY) \
    .option('secretAccessKey', SECRET_KEY) \
    .load()

geo_schema = StructType([
    StructField("ind", IntegerType(), True),
    StructField("timestamp", StringType(), True),
    StructField("latitude", DoubleType(), True),
    StructField("longitude", DoubleType(), True),
    StructField("country", StringType(), True),
])


stream_geo_df = stream_geo_df.filter(stream_geo_df.partitionKey == "geo-partition")
stream_geo_df = stream_geo_df.selectExpr("CAST(data as STRING) jsonData")
stream_geo_df = stream_geo_df.select(from_json("jsonData", geo_schema).alias("data")).select("data.*")

display(stream_geo_df)
    

ind,timestamp,latitude,longitude,country
719,2018-10-29T04:05:12,-38.4062,-154.323,Armenia
1704,2018-11-29T04:46:39,-56.8702,-93.6232,Christmas Island
2482,2019-09-13T08:20:13,63.4563,-164.709,Bermuda
8930,2021-09-29T13:25:49,-89.4739,-176.154,Argentina
7294,2018-02-12T08:06:28,-76.2967,-136.501,Australia
808,2019-01-03T15:43:12,-71.6856,-179.126,Albania
771,2018-06-21T08:42:57,-29.1712,-107.111,Montserrat
4076,2019-06-07T20:13:50,-67.2157,27.8139,Mauritania
1545,2022-06-17T05:47:12,-30.18,-72.3784,Anguilla
2293,2022-03-21T10:46:53,-87.7946,-159.647,British Virgin Islands


[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
File [0;32m<command-4087974376823060>, line 11[0m
[1;32m      1[0m stream_geo_df [38;5;241m=[39m spark \
[1;32m      2[0m     [38;5;241m.[39mreadStream \
[1;32m      3[0m     [38;5;241m.[39mformat([38;5;124m'[39m[38;5;124mkinesis[39m[38;5;124m'[39m) \
[0;32m   (...)[0m
[1;32m      8[0m     [38;5;241m.[39moption([38;5;124m'[39m[38;5;124msecretAccessKey[39m[38;5;124m'[39m, SECRET_KEY) \
[1;32m      9[0m     [38;5;241m.[39mload()
[0;32m---> 11[0m geo_schema [38;5;241m=[39m StructField([
[1;32m     12[0m     StructField([38;5;124m"[39m[38;5;124mindex[39m[38;5;124m"[39m, IntegerType(), [38;5;28;01mTrue[39;00m),
[1;32m     13[0m     StructField([38;5;124m"[39m[38;5;124mtimestamp[39m[38;5;124m"[39m, StringType(), [38;5;28;01mTrue[39;00m),
[1;32m     14[0m     S

In [0]:
from pyspark.sql.functions import expr
from pyspark.sql.functions import from_json
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

stream_user_df = spark \
    .readStream \
    .format('kinesis') \
    .option('streamName', 'Kinesis-Prod-Stream') \
    .option('initialPosition', 'latest') \
    .option('region', 'us-east-1') \
    .option('accessKeyId', ACCESS_KEY) \
    .option('secretAccessKey', SECRET_KEY) \
    .load()

user_schema = StructType([
    StructField("ind", IntegerType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("date_joined", StringType(), True),
])

stream_user_df = stream_user_df.filter(stream_user_df.partitionKey == "user-partition")
stream_user_df = stream_user_df.selectExpr("CAST(data as STRING) jsonData")
stream_user_df = stream_user_df.select(from_json("jsonData", user_schema).alias("data")).select("data.*")

display(stream_user_df)

ind,first_name,last_name,age,date_joined
719,Alicia,Avila,20,2016-01-26T01:49:23
1704,Barry,Lynn,20,2015-10-28T05:00:33
2482,David,Moss,22,2016-03-01T07:11:48
8930,Andrew,Anderson,23,2015-11-28T11:52:37
7294,Dawn,Anderson,20,2016-12-31T00:06:36
808,Aaron,Bartlett,21,2015-11-24T02:15:36
771,Brittany,Butler,32,2016-03-10T04:11:31
4076,Larry,Pineda,20,2015-10-23T22:47:39
1545,Jonathan,Avila,46,2015-11-27T23:11:21
2293,Alex,Bean,24,2016-01-11T23:08:27


In [0]:
from pyspark.sql.functions import regexp_replace


def clean_pin_data(stream_pin_df):
    pin_cleaned_df = stream_pin_df.replace({'No description available Story format': None,}, subset=['description'])
    pin_cleaned_df = pin_cleaned_df.replace({'N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e': None,}, subset=['tag_list'])
    pin_cleaned_df = pin_cleaned_df.replace({'No Title Data Available': None,}, subset=['title'])
    pin_cleaned_df = pin_cleaned_df.replace({'User Info Error': None, }, subset=['follower_count'])
    pin_cleaned_df = pin_cleaned_df.replace({'Image src error.': None, }, subset=['image_src'])
    pin_cleaned_df = pin_cleaned_df.replace({'User Info Error': None, }, subset=['poster_name'])

    cleaned_pin_data_df = pin_cleaned_df.withColumn('follower_count', pin_cleaned_df['follower_count'].cast('int'))
    cleaned_pin_data_df = cleaned_pin_data_df.withColumn('save_location', regexp_replace('save_location', 'Local save in', ''))
    cleaned_pin_data_df = cleaned_pin_data_df.withColumn('follower_count', regexp_replace('follower_count', 'k', '000'))
    cleaned_pin_data_df = cleaned_pin_data_df.withColumn('follower_count', regexp_replace('follower_count', 'M', '000000'))

    cleaned_pin_data_df = cleaned_pin_data_df.withColumnRenamed('index', 'ind')
    cleaned_pin_data_df = cleaned_pin_data_df.select('ind', 'unique_id', 'title', 'description', 'follower_count', 'poster_name', 'tag_list', 'is_image_or_video', 'image_src', 'save_location', 'category')

    return cleaned_pin_data_df

In [0]:
pin_df_cleaned = clean_pin_data(stream_pin_df)
display(pin_df_cleaned)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
1704,5fbf9863-fb79-477c-a5b6-540c3020a55f,Christmas Trees From Pallet Wood | Holiday DIY,Christmas Trees From Pallet Wood | Holiday DIY: Deck the yard with some fun outdoor Christmas Trees! We made these merry and bright decorations from two old pallets we had lying…,,Instructables,"Pallet Wood Christmas Tree,Wooden Christmas Crafts,Diy Christmas Tree,Christmas Projects,Holiday Crafts,Wooden Xmas Trees,Different Christmas Trees,Pallet Tree,Christmas Kitchen",image,https://i.pinimg.com/originals/64/7b/ca/647bca35169b7c144604116c64bcba8a.png,/data/christmas,christmas
2482,08604f20-fa17-4b9a-9949-781717eca6cd,FORNT PORCH CHRISTMAS DECORATING IDEAS,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",,"Life on Summerhill | Home, Holiday Decor & DIY Website","Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",video,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,/data/christmas,christmas
8930,a2999c28-f7b2-4577-af87-49d21d6d8a18,135 Sunflower Tattoo Ideas - [Best Rated Designs in 2021],We have put together the Ultimative Sunflower Tattoo Collection in 2020. Check out our highest rated handpicked Sunflower designs here!,,Next Luxury,"Sunflower Tattoo Sleeve,Sunflower Tattoo Shoulder,Sunflower Tattoo Small,Sunflower Tattoos,Sunflower Tattoo Design,Shoulder Tattoo,Sunflower Mandala Tattoo,Sunflower Tattoo Meaning,Form Tattoo",image,https://i.pinimg.com/originals/18/de/6c/18de6c81a8637e224c7d63dce1414ceb.jpg,/data/tattoos,tattoos
7294,9d8409ef-bea7-4133-a021-c58b9c042210,Foolproof Fall Fashion For Men You'll Love,Looking for the coolest fall fashion for men this season? Click to read this article on fall fashion for men to look sharp on LLEGANCE.,,LLEGANCE | Workwear Fashion Woman Advice For Driven Professionals,"Fashion 90s,Autumn Fashion,Fashion Ideas,Trendy Fashion,Fashion Photo,Fashion Outfits,Fashion Trends,Urban Fashion Men,Men's Casual Fashion",image,https://i.pinimg.com/originals/4c/bf/2d/4cbf2d6e04d2502612169220553f4cef.jpg,/data/mens-fashion,mens-fashion
808,23f67e42-db9f-4aae-8b82-cbec64ad9f4d,Chalk Pastel Hot Chocolate Art Project,Kids will use chalk pastels and black paper to create this bold and bright hot chocolate art project! Easy to follow tutorial included.,,Projects with Kids,"Hot Chocolate Art,Chocolate Crafts,Art Activities For Kids,Preschool Art,Preschool Winter,Christmas Activities,Winter Art Projects,Clay Art Projects,Chalk Pastel Art",image,https://i.pinimg.com/originals/5a/d3/82/5ad38202cf8acb022cb8f46ce2a78ad6.jpg,/data/art,art
771,a5021766-a8aa-4dc7-9857-4da6b8e3dc1a,African Sunset Shadow Tracing Art - Taming Little Monsters,"Use your mini world figures to create this beautiful African sunset. Your kids will love learning about shadows, angles and distortion in this fun art and STEM activity for kids.",,Taming Little Monsters - Fun Activities for Kids,"African Art Projects,Cool Art Projects,Projects For Kids,African Art For Kids,African Crafts Kids,Art Club Projects,Art Education Projects,Tracing Art,African Sunset",image,https://i.pinimg.com/originals/e3/aa/35/e3aa350f8f104d0e59f26d7f17ea7461.png,/data/art,art
4076,3a52d364-7c04-47cb-a3e5-56d9e2b77528,Phonics Activities Your Kids Will Love - The Literacy Nest,"Hi everyone! As a teacher using the Orton-Gillingham approach, I am constantly looking for phonics activities that my students will find fun and engaging. Using Orton-Gillingham…",,The Literacy Nest,"Literacy Games,Kindergarten Activities,Literacy Centers,Fun Phonics Activities,Listening Activities,Vocabulary Games,Literacy Stations,Letter Activities,Montessori Activities",image,https://i.pinimg.com/originals/58/8e/38/588e380b19942a71a86a69d9c9973d25.png,/data/education,education
1545,0c629541-cc5b-4b61-bd3f-613346893258,Oil Cleansing Method: What it is and why you should do it.,"The oil cleansing method is a simple, natural way to have clean skin without using toxic chemicals. It will leave your face radiant and soft!",,Thank Your Body,"Beauty Care,Diy Beauty,Beauty Hacks,Beauty Shop,Oil Cleansing Method,Goji,Korean Skincare Routine,Homemade Beauty Products,Organic Beauty",image,https://i.pinimg.com/originals/2c/1c/da/2c1cda7da86ee711536bbacfe89c75c9.jpg,/data/beauty,beauty
2293,1fd7d4cc-54c1-4542-9c1f-449cb4c875f4,Over 40 of the BEST Homemade Christmas Ornament Ideas,Over 40 of the BEST Homemade Christmas Ornaments...these easy Holiday crafts are so fun to make for Kids and Adults! Lots of great class party ideas!,,Kitchen Fun With My 3 Sons,"Diy Christmas Lights,Homemade Christmas Decorations,Christmas Crafts For Gifts,Mini Christmas Tree,Christmas Ornament Crafts,Homemade Christmas Gifts,Xmas Decorations,Simple Christmas,Handmade Christmas",image,https://i.pinimg.com/originals/d0/d3/a3/d0d3a31e87296224d5ba8896904b5b88.jpg,/data/christmas,christmas
10509,6a5f0e7e-1389-4e54-942f-4cb69f09f140,Flying Car,,,TheSuperBOO!,"Electric Off Road Vehicle,Cool Illusions,Flying Vehicles,Wow Video,Art And Craft Videos,Top Luxury Cars,Chevy Muscle Cars,Street Racing Cars,Funny Videos Clean",multi-video(story page format),https://i.pinimg.com/videos/thumbnails/originals/4a/d4/96/4ad4961606179854f1141a90f50f002c.0000001.jpg,/data/vehicles,vehicles


In [0]:
from pyspark.sql.functions import array, col, to_timestamp

def cleaned_geo_df(stream_geo_df):
    clean_geo_df = stream_geo_df.withColumn('coordinates', array(col('longitude'), col('latitude')))
    clean_geo_df = clean_geo_df.drop('longitude', 'latitude')

    clean_geo_df = clean_geo_df.withColumn('timestamp', to_timestamp(col('timestamp')))
    clean_geo_df = clean_geo_df.select('ind', 'country', 'coordinates', 'timestamp')

    return clean_geo_df

In [0]:
cleaned_geo_df = cleaned_geo_df(stream_geo_df)
display(cleaned_geo_df)

ind,country,coordinates,timestamp
1704,Christmas Island,"List(-93.6232, -56.8702)",2018-11-29T04:46:39Z
2482,Bermuda,"List(-164.709, 63.4563)",2019-09-13T08:20:13Z
8930,Argentina,"List(-176.154, -89.4739)",2021-09-29T13:25:49Z
7294,Australia,"List(-136.501, -76.2967)",2018-02-12T08:06:28Z
808,Albania,"List(-179.126, -71.6856)",2019-01-03T15:43:12Z
771,Montserrat,"List(-107.111, -29.1712)",2018-06-21T08:42:57Z
4076,Mauritania,"List(27.8139, -67.2157)",2019-06-07T20:13:50Z
1545,Anguilla,"List(-72.3784, -30.18)",2022-06-17T05:47:12Z
2293,British Virgin Islands,"List(-159.647, -87.7946)",2022-03-21T10:46:53Z
10509,Gibraltar,"List(-24.2977, -67.187)",2019-03-06T12:21:56Z


[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
File [0;32m<command-7717831699936065>, line 1[0m
[0;32m----> 1[0m cleaned_geo_df [38;5;241m=[39m cleaned_geo(stream_geo_df)
[1;32m      2[0m display(cleaned_geo_df)

[0;31mNameError[0m: name 'cleaned_geo' is not defined

In [0]:
from pyspark.sql.functions import concat

def cleaned_user_df(stream_user_df):
    clean_user_df = stream_user_df.withColumn('user_name', concat('first_name', 'last_name'))
    clean_user_df = clean_user_df.drop('first_name', 'last_name')
    clean_user_df = clean_user_df.withColumn('user_name', regexp_replace("user_name", "([a-z]) ([A-Z])", r"\1 \2")) 

    clean_user_df = clean_user_df.withColumn('date_joined', to_timestamp(col('date_joined')))
    clean_user_df = clean_user_df.select('ind' , 'user_name', 'age', 'date_joined')
    return clean_user_df

In [0]:
cleaned_user_stream = cleaned_user_df(stream_user_df)
display(cleaned_user_stream)

ind,user_name,age,date_joined
771,BrittanyButler,32,2016-03-10T04:11:31Z
4076,LarryPineda,20,2015-10-23T22:47:39Z
1545,JonathanAvila,46,2015-11-27T23:11:21Z
2293,AlexBean,24,2016-01-11T23:08:27Z
10509,BrittanyThompson,49,2016-04-22T20:36:02Z
2015,ChristopherBradshaw,27,2016-03-08T13:38:37Z
1094,AnnElliott,20,2016-07-02T02:32:30Z
603,BrandonJackson,44,2016-10-29T02:07:21Z
7768,ChristineCortez,23,2015-12-01T18:15:02Z
5630,CarlosEstrada,22,2016-02-09T17:01:38Z


In [0]:
dbutils.fs.rm("/tmp/kinesis/_checkpoints/", True)

pin_df_cleaned.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
    .table("0eaf46a0829f_pin_table")

<pyspark.sql.streaming.query.StreamingQuery at 0x7fc7f8a710f0>

In [0]:


cleaned_geo_df.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", "/tmp/kinesis/_checkpoints/cleaned_geo_df/") \
    .table("0eaf46a0829f_geo_table")

<pyspark.sql.streaming.query.StreamingQuery at 0x7fc7f818f070>

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
File [0;32m<command-7717831699936068>, line 1[0m
[0;32m----> 1[0m [43mdf[49m[38;5;241m.[39mwriteStream \
[1;32m      2[0m     [38;5;241m.[39mformat([38;5;124m"[39m[38;5;124mdelta[39m[38;5;124m"[39m) \
[1;32m      3[0m     [38;5;241m.[39moutputMode([38;5;124m"[39m[38;5;124mappend[39m[38;5;124m"[39m) \
[1;32m      4[0m     [38;5;241m.[39moption([38;5;124m"[39m[38;5;124mcheckpointLocation[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124m/tmp/kinesis/_checkpoints/[39m[38;5;124m"[39m) \
[1;32m      5[0m     [38;5;241m.[39mtoTable([38;5;124m"[39m[38;5;124m0eaf46a0829f_pin_table[39m[38;5;124m"[39m)

[0;31mNameError[0m: name 'df' is not defined

In [0]:


cleaned_user_stream.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", "/tmp/kinesis/_checkpoints/cleaned_user_stream/") \
    .table("0eaf46a0829f_user_table")

<pyspark.sql.streaming.query.StreamingQuery at 0x7fc7f8a736d0>