# Imports

In [None]:
import pandas as pd
from screenshot import Screenshot
from ocr import read_image, threeline_format
import glob
import os
import matplotlib.pyplot as plt
import json
from spotify import search_song, get_token, add_to_liked_songs

from thefuzz import fuzz

# 0 Load Necessary Paths in JSON

In [None]:
with open("params.json") as param_data:
    params = json.load(param_data)

for key, value in params.items():
    print(key, value)

# 1 Process the Screensots

# 1.1 Accumulate the screensots into a DataFrame

* ```screenshot_dir``` is a directory with only pandora sceenshots
* ```screenshot_ext``` is the file extension (```jpg```, by default) of the screenshots

In [None]:
screenshot_path = "./example/screenshots"
screenshot_ext = "jpg"

In [None]:
screenshot_paths = glob.glob(os.path.join(screenshot_path, f"*.{screenshot_ext}"))
print(f"{len(screenshot_paths)} Screenshots Found!")

## 1.2 Create "Screenshot" objects from the filepaths

In [None]:
screenshot_objs = [Screenshot(path) for path in screenshot_paths]

Let's take a look at the top if the images

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.imshow(screenshot_objs[0].img[:400, :])
ax1.set(yticklabels=[])
ax1.set(xticklabels=[])
ax1.set_title("Top of screenshot")
ax2.imshow(screenshot_objs[0].img[-400:, :])
ax2.set(yticklabels=[])
ax2.set(xticklabels=[])
ax2.set_title("Bottom of screenshot")

fig.show()

### 1.2.1 Crop the images 
* Crop the image from the top (Remove status bar and top banner)
* Crop the image from the bottom (remove bottom banner)
* Crop from the left (Remove album art)

In [None]:
for screenshot in screenshot_objs:
    screenshot.crop(upper_pixel = 128, lower_pixel = 168, left_pixel = 130)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.imshow(screenshot_objs[0].img[:400, :])
ax1.set(yticklabels=[])
ax1.set(xticklabels=[])
ax1.set_title("Top of screenshot")
ax2.imshow(screenshot_objs[0].img[-400:, :])
ax2.set(yticklabels=[])
ax2.set(xticklabels=[])
ax2.set_title("Bottom of screenshot")

fig.show()

### 1.2.2 Identify the Horizontal Seperators (lignt gray lines between songs)

In [None]:
for screenshot in screenshot_objs:
    screenshot.find_seperators()

### 1.2.3 Use the seperators to crop out each song

In [None]:
for screenshot in screenshot_objs:
    screenshot.partition()

In [None]:
partitioned_images = screenshot_objs[0].partitioned_imgs[:9]

fig, axes = plt.subplots(3, 3, figsize=(8, 4))
axes = [ax for row in axes for ax in row] #Just unpack
for ax, img in zip(axes, partitioned_images):
    ax.imshow(img, cmap="gray")
    ax.set(yticklabels=[])
    ax.set(xticklabels=[])

fig.show()

### Dump the partitioned songs to a directory

In [None]:
out_dir = "./example/partitioned"
os.makedirs(out_dir, exist_ok = True)

In [None]:
for screenshot in screenshot_objs:
    screenshot.write_partitions(out_dir)

# 2 Process the Images

## 2.1 Aggregate the partitioned Images into a dataframe

In [None]:
partitioned_images = glob.glob(os.path.join(out_dir, "*.jpg"))
image_df = pd.DataFrame({"image_path":partitioned_images})
image_df.head()

## 2.2 Use OCR software to convert each image to text

In [None]:
image_df["image_text_raw"] = image_df["image_path"].apply(lambda x: read_image(x, params["TESSERACT_PATH"]))
image_df["image_text_raw"] = image_df["image_text_raw"].str.replace("|", "I")
image_df.head()

### 2.2.1 Get rid of cases that don't have three lines
* Lines should be Name, Artist, Duration
* If tesseract misread the image, get rid of it now, deal with these cases manually

In [None]:
#Check if the detected string has three lines (2 newline characters)
image_df["properly_read"] = image_df["image_text_raw"].apply(lambda x: threeline_format(x))

#Fork off improperly read cases into a different dataframe
failed_row_df = image_df.loc[~image_df["properly_read"]]
#Keep only the properly read cases
image_df = image_df.loc[image_df["properly_read"]]

## 2.3 Get the song name and artist from the extracted text

In [None]:
image_df["name"] = image_df["image_text_raw"].apply(lambda x: x.strip().split("\n")[0])
image_df["artist"] = image_df["image_text_raw"].apply(lambda x: x.strip().split("\n")[1])

image_df.head()

# 3 Add Songs to Spotify

## 3.1 Set Up Spotify API and search for the songs

In [None]:
token = get_token(params["SPOTIFY_CLIENT_ID"], 
                  params["SPOTIFY_CLIENT_SECRET"],
                  redirect_uri = params["SPOTIFY_REDIRECT_URI"])

print("token is:", token)
print("Querying Spotify for all songs...")
image_df["spotify_top_hit"] = image_df.apply(lambda x: search_song(x["name"], x["artist"], token), axis=1)


### 3.1.1 Filter out any bad responses

In [None]:
#Fork off all the cases where a  None response was returned (Failed search)
failed_row_df = failed_row_df.append(image_df.loc[image_df.spotify_top_hit.isna()])
image_df = image_df.loc[(~image_df.spotify_top_hit.isna())]

In [None]:
print(f"Successfully Identified {image_df.shape[0]} songs on Spotify")

## 3.2 Extract the Spotify ID and Name from the top hit

In [None]:
#Extract ids and name from the spotify hit
image_df["spotify_id"] = image_df["spotify_top_hit"].apply(lambda x: x["id"])
image_df["spotify_name"] = image_df["spotify_top_hit"].apply(lambda x: x["name"])
image_df["spotify_artist"] = image_df["spotify_top_hit"].apply(lambda x: x["artists"][0]["name"])

image_df.sample(5)

## Verify that Top hit is similar to extracted text with fuzzy text matching

In [None]:
#Do this later

## 3.3 Add Songs to spotify liked songs

In [None]:
for _, row in image_df.iterrows():
    add_to_liked_songs(row["spotify_id"])

# 4 Dump data to CSV

In [None]:
image_df.to_csv("image_df.csv")
failed_row_df.to_csv("failed_row_df.csv")