In [2]:
#Import library
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import time
import requests
import os
import csv


from typing import Final
from IPython.display import display
from pathlib import Path
from plotly import subplots
from plotly.subplots import make_subplots




In [3]:
df = pd.read_csv("data/webtoon_originals_all_languages.csv", low_memory=False)
CSV : Final = pd.read_csv("data/webtoon_originals_all_languages.csv", low_memory=False)

### Analizing Data Set

In [4]:

print(CSV.columns)
print(CSV.shape[0])

Index(['title_id', 'title', 'genre', 'authors', 'weekdays', 'length',
       'subscribers', 'rating', 'views', 'likes', 'status', 'daily_pass',
       'synopsis', 'language'],
      dtype='object')
6268


In [5]:

data= CSV.copy()


table_data = {
    "Column": data.columns,
    "Data Type": [str(type(data[col].dropna().iloc[0])) if not data[col].dropna().empty else "None" for col in data.columns]
}


row_height = 30  
height = row_height * len(data.columns) + 100  
width = 800  

# Creating the table with Plotly
fig = go.Figure(
    data=[
        go.Table(
            header=dict(
                values=["Column", "Data Type"],
                fill_color="paleturquoise",
                align="left"
            ),
            cells=dict(
                values=[table_data["Column"], table_data["Data Type"]],
                fill_color="lavender",
                align="left"
            )
        )
    ]
)
fig.update_layout(width=width, height=height)
fig.show()

In [6]:

data = CSV.copy()

# Compute the correlation matrix for numeric columns
#correlation_matrix = data.select_dtypes(include=[np.number]).corr()

selected_columns = ['likes', 'views', 'rating', 'subscribers', 'length']
correlation_matrix = data[selected_columns].corr()


# Mask the lower triangle of the correlation matrix
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))

# Mask the top triangle of the correlation matrix
#mask = np.tril(np.ones_like(correlation_matrix, dtype=bool))

correlation_matrix = correlation_matrix.where(mask)

# Create the heatmap
fig = go.Figure(
    data=go.Heatmap(
        z=correlation_matrix.values,
        x=correlation_matrix.columns,
        y=correlation_matrix.columns,
        # Divergent color scale
        colorscale='RdBu',  
        zmin=-1,
        zmax=1,
        colorbar=dict(title="Correlation Coefficient")
    )
)

# Update the layout
fig.update_layout(
    title="",
    xaxis=dict(title=""),
    yaxis=dict(title=""),
    autosize=False,
    width=800,
    height=800
)

# Display the heatmap
fig.show()


In [7]:

url = "https://webtoon.p.rapidapi.com/canvas/episodes/list"

querystring = {"startIndex":"0","titleNo":"300138","language":"en","pageSize":"200"}

headers = {
	"x-rapidapi-key": "6bad34c0b3msh3a5ed983d145ca5p1c02b8jsnaae4eb86a520",
	"x-rapidapi-host": "webtoon.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

{'message': {'type': 'response', 'service': 'com.naver.webtoon', 'version': '0.0.1', 'result': {'episodeList': {'totalServiceEpisodeCount': 158, 'count': 20, 'now': 1732785865471, 'episode': [{'titleNo': 300138, 'episodeNo': 159, 'episodeSeq': 158, 'episodeTitle': "The Flares' help", 'thumbnailImageUrl': '/20241116_75/1731683288570cMPVi_PNG/04871e88-3780-4b38-ac5e-d7407cb9799a14743750677567316850.png?type=q70', 'exposureYmdt': 1731697201000, 'registerYmdt': 1731697201000, 'modifyYmdt': 1731697200000, 'readCount': 158926, 'likeCount': 24752, 'likeit': False, 'serviceStatus': 'SERVICE', 'exposureType': 'FREE'}, {'titleNo': 300138, 'episodeNo': 158, 'episodeSeq': 157, 'episodeTitle': 'Unchained', 'thumbnailImageUrl': '/20241028_183/1730111275207iwfeS_PNG/fd273c02-4df0-491c-ad2d-fb3bcec060121235411506953725558.png?type=q70', 'exposureYmdt': 1730487601000, 'registerYmdt': 1730487601000, 'modifyYmdt': 1730487600000, 'readCount': 194432, 'likeCount': 28443, 'likeit': False, 'serviceStatus': '

In [8]:

data = CSV.copy()

# Direct filtering and transformation version (commented for comparison):
# data['new_rating'] = 10 - data['rating']
# filtered_data = data[data['likes'] >= 1000].copy()

# For loop version
# Create a new list to store filtered rows
filtered_rows = []

# Iterate over rows in the dataset
for index, row in data.iterrows():
    # Apply the transformation to the rating
    new_rating = 10 - row['rating']
    
    # Filter rows with likes >= 1000
    if row['likes'] >= 1000:
        # Append the updated row to the list
        filtered_rows.append({
            'title': row['title'],
            'rating': row['rating'],
            'likes': row['likes'],
            'new_rating': new_rating
        })

# Convert filtered_rows to a DataFrame
filtered_data = pd.DataFrame(filtered_rows)

# Create a scatter plot
fig = px.scatter(filtered_data, x='likes', y='rating', 
                 title='Correlation Between Likes and Rating',
                 labels={'likes': 'Likes', 'rating': 'Rating'},
                 hover_data=['title'])

# Update layout for better readability
fig.update_layout(
    xaxis=dict(title='Likes (Log Scale)', type='log'),
    yaxis=dict(title='Rating')
)

# Customize marker size and transparency
fig.update_traces(marker=dict(size=6, opacity=0.6))  # Smaller size, transparent points

# Show the plot
fig.show()


In [9]:

data = CSV.copy()

# Create a histogram
fig = px.histogram(
    data, 
    x='rating', 
    nbins=20,  # Adjust bins 
    title='Distribution of Ratings',
    labels={'rating': 'Rating'},  # Label x-axis
)


fig.update_layout(
    xaxis=dict(title='Rating'),
    yaxis=dict(title='Frequency'),
    width=800,
    height=500
)

# Show the plot
fig.show()

In [10]:

data = CSV.copy()
# quintiles `rating` column
data['quintile'] = pd.qcut(data['rating'], q=5, labels=[1, 2, 3, 4, 5])

# Create a box plot grouped by quintiles
fig = px.box(
    data, 
    x='quintile', 
    y='rating', 
    title='',
    labels={'quintile': 'Quintile', 'rating': 'Rating'},
    color='quintile'  # Optional: add color for distinction
)

# Update layout for better readability
fig.update_layout(
    xaxis=dict(title='Quintile'),
    yaxis=dict(title='Rating'),
    width=800,
    height=500
)

# Show the plot
fig.show()


In [11]:
data = CSV.copy()
data = data[['subscribers', 'views', 'likes']]
data = data.apply(np.log1p)  # Logarithmic transformation to handle zeros

# Create a subplot grid (2 rows, 2 columns)
fig = make_subplots(rows=2, cols=2, 
                    subplot_titles=[
                        "Likes vs Subscribers", 
                        "Views vs Subscribers",
                        "Views vs Likes"
                    ])

# Add scatter plots for each pair
fig.add_trace(
    go.Scatter(x=data['likes'], y=data['subscribers'], mode='markers', name='Likes vs Subscribers'),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=data['views'], y=data['subscribers'], mode='markers', name='Views vs Subscribers'),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(x=data['likes'], y=data['views'], mode='markers', name='Views vs Likes (Flipped Axes)'),  # Flipped
    row=2, col=1
)

# Update layout for better readability
fig.update_layout(
    title_text="",
    width=1000,  # Adjusted for 2x2 grid
    height=800,
    showlegend=False
)

fig.update_traces(marker=dict(size=6, opacity=0.6))  # Smaller size, transparent points

# Update axis labels for each subplot
fig.update_xaxes(title_text="Likes (log)", type='log', row=1, col=1)
fig.update_yaxes(title_text="Subscribers (log)", type='log', row=1, col=1)

fig.update_xaxes(title_text="Views (log)", type='log', row=1, col=2)
fig.update_yaxes(title_text="", type='log', row=1, col=2)

fig.update_xaxes(title_text="Likes (log)", type='log', row=2, col=1)  # Flipped Axes
fig.update_yaxes(title_text="Views (log)", type='log', row=2, col=1)  # Flipped Axes

# Show the plot
fig.show()

In [71]:

data = CSV.copy()

# Create a mapping of language codes to extended names
language_mapping = {
    'de': 'German',
    'en': 'English',
    'es': 'Spanish',
    'fr': 'French',
    'id': 'Indonesian',
    'th': 'Thai',
    'zh-hant': 'Traditional Chinese',
}

# Replace the values in the 'language' column using the mapping
data['language'] = data['language'].replace(language_mapping)

# Calculate statistics for each language
language_stats = data.groupby('language').agg(
    total_likes=('likes', 'sum'),
    total_views=('views', 'sum'),
    total_subscribers=('subscribers', 'sum'),
    total_entries=('language', 'count')
)

# Calculate ratios
language_stats['likes_to_views_ratio'] = language_stats['total_likes'] / language_stats['total_views']
language_stats['subscribers_to_views_ratio'] = language_stats['total_subscribers'] / language_stats['total_views']

# Reset index for plotting
language_stats_df = language_stats.reset_index()

# Create subplots with three columns
fig = make_subplots(
    rows=1, cols=3, 
    horizontal_spacing=0.15,  # Adjust spacing between plots
    subplot_titles=["Likes-to-Views Ratio", "Subscribers-to-Views Ratio", "Number of Entries per Language"]
)

# Plot Likes-to-Views Ratio
fig.add_trace(
    go.Bar(
        x=language_stats_df['language'], 
        y=language_stats_df['likes_to_views_ratio'],
        name='Likes-to-Views Ratio',
        text=[f"{val:.2f}" for val in language_stats_df['likes_to_views_ratio']],
        textposition='auto'
    ),
    row=1, col=1
)

# Plot Subscribers-to-Views Ratio
fig.add_trace(
    go.Bar(
        x=language_stats_df['language'], 
        y=language_stats_df['subscribers_to_views_ratio'],
        name='Subscribers-to-Views Ratio',
        text=[f"{val:.2f}" for val in language_stats_df['subscribers_to_views_ratio']],
        textposition='auto'
    ),
    row=1, col=2
)

# Plot Number of Entries
fig.add_trace(
    go.Bar(
        x=language_stats_df['language'], 
        y=language_stats_df['total_entries'],
        name='Number of Entries',
        text=language_stats_df['total_entries'],
        textposition='auto'
    ),
    row=1, col=3
)

# Update layout for better readability
fig.update_layout(
    title_text="Language Metrics Analysis",
    width=1200,
    height=500,
    showlegend=False
)

# Update axis titles
fig.update_xaxes(title_text="Language", row=1, col=1)
fig.update_yaxes(title_text="Likes-to-Views Ratio", row=1, col=1)

fig.update_xaxes(title_text="Language", row=1, col=2)
fig.update_yaxes(title_text="Subscribers-to-Views Ratio", row=1, col=2)

fig.update_xaxes(title_text="Language", row=1, col=3)
fig.update_yaxes(title_text="Number of Entries", row=1, col=3)

# Show the plot
fig.show()


In [12]:
import requests
import pandas as pd

# Define the API endpoint and your API key
API_URL = "https://webtoon.p.rapidapi.com/canvas/list"
HEADERS = {
    "X-RapidAPI-Key": "6bad34c0b3msh3a5ed983d145ca5p1c02b8jsnaae4eb86a520",  # Replace with your RapidAPI key
    "X-RapidAPI-Host": "webtoon.p.rapidapi.com"
}

def fetch_webtoon_canvas_info():
    try:
        # Example parameters, adjust as needed based on API documentation
        query_params = {
            "category": "canvas",
            "limit": 100  # Adjust the limit as per API documentation
        }
        
        response = requests.get(API_URL, headers=HEADERS, params=query_params)

        if response.status_code == 200:
            data = response.json()
            print("Successfully fetched Webtoon Canvas info.")
            return data
        else:
            print(f"Error: Unable to fetch data, status code {response.status_code}")
            print(response.json())
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def save_to_csv(canvas_data, output_file="output.csv"):
    # Convert JSON data to a DataFrame
    try:
        df = pd.DataFrame(canvas_data)
        df.to_csv(output_file, index=False)
        print(f"Data successfully saved to {output_file}")
    except Exception as e:
        print(f"Error saving data to CSV: {e}")

def main():
    canvas_info = fetch_webtoon_canvas_info()
    if canvas_info and 'results' in canvas_info:
        # Extract the relevant information into a list of dictionaries
        canvas_data = [
            {
                "Title": canvas.get("title"),
                "Author": canvas.get("author"),
                "Description": canvas.get("description"),
                "URL": canvas.get("url"),
                "Genre": canvas.get("genre"),
                "Language": canvas.get("language"),
                "Views": canvas.get("views"),
                "Likes": canvas.get("likes"),
                "Subscribers": canvas.get("subscribers")
            }
            for canvas in canvas_info.get('results', [])
        ]
        
        # Save the data to a CSV file
        save_to_csv(canvas_data)
    else:
        print("No data to save.")

#if __name__ == "__main__":
#    main()
