In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from wordcloud import WordCloud, STOPWORDS
from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QFileDialog, QLabel, QVBoxLayout, QWidget

class FileUploadApp(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("File Upload Example")
        self.setGeometry(100, 100, 600, 400)

        self.upload_csv_button = QPushButton("Upload CSV", self)
        self.upload_csv_button.clicked.connect(self.upload_csv)

        self.upload_json_button = QPushButton("Upload JSON", self)
        self.upload_json_button.clicked.connect(self.upload_json)

        self.generate_output_button = QPushButton("Generate Output", self)
        self.generate_output_button.clicked.connect(self.generate_output)
        self.generate_output_button.setEnabled(False)

        self.csv_label = QLabel("CSV File: ")
        self.json_label = QLabel("JSON File: ")

        layout = QVBoxLayout()
        layout.addWidget(self.upload_csv_button)
        layout.addWidget(self.upload_json_button)
        layout.addWidget(self.csv_label)
        layout.addWidget(self.json_label)
        layout.addWidget(self.generate_output_button)

        self.central_widget = QWidget()
        self.central_widget.setLayout(layout)
        self.setCentralWidget(self.central_widget)

        self.csv_path = None
        self.json_path = None

    def upload_csv(self):
        options = QFileDialog.Options()
        file_path, _ = QFileDialog.getOpenFileName(self, "Upload CSV", "", "CSV Files (*.csv)", options=options)
        if file_path:
            self.csv_path = file_path
            self.csv_label.setText("CSV File: " + self.csv_path)
            self.check_enable_generate_output_button()

    def upload_json(self):
        options = QFileDialog.Options()
        file_path, _ = QFileDialog.getOpenFileName(self, "Upload JSON", "", "JSON Files (*.json)", options=options)
        if file_path:
            self.json_path = file_path
            self.json_label.setText("JSON File: " + self.json_path)
            self.check_enable_generate_output_button()

    def check_enable_generate_output_button(self):
        if self.csv_path is not None and self.json_path is not None:
            self.generate_output_button.setEnabled(True)
        else:
            self.generate_output_button.setEnabled(False)

    def generate_output(self):
        videos = pd.read_csv(self.csv_path)
        videos_categories = pd.read_json(self.json_path)

        categories = {category['id']: category['snippet']['title'] for category in videos_categories['items']}
        videos.insert(4, 'category', videos['category_id'].astype(str).map(categories))
        videos = videos.dropna()

        videos["likes_per_view"] = videos["likes"] / videos["views"]
        videos["dislikes_per_view"] = videos["dislikes"] / videos["views"]
        videos["comments_per_view"] = videos["comment_count"] / videos["views"]
        videos["total_likes_dislikes"] = videos["likes"] + videos["dislikes"]
        videos["total_likes_dislikes_per_view"] = videos["total_likes_dislikes"] / videos["views"]
        videos["likes_percentage"] = videos["likes"] / videos["total_likes_dislikes"]
        videos["dislikes_percentage"] = videos["dislikes"] / videos["total_likes_dislikes"]
        videos['trending_date'] = pd.to_datetime(videos['trending_date'], format='%y.%d.%m').dt.date
        reformatted_time = pd.to_datetime(videos['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')
        videos['publish_date'] = reformatted_time.dt.date
        videos['publish_time'] = reformatted_time.dt.time
        videos['publish_hour'] = reformatted_time.dt.hour
        videos["title_characters"] = videos["title"].apply(lambda x: len(x))
        videos["title_words"] = videos["title"].apply(lambda x: len(x.split()))
        videos["description_characters"] = videos["description"].apply(lambda x: len(str(x)))
        videos["description_words"] = videos["description"].apply(lambda x: len(str(x).split()))

        videos = videos.dropna()
        min_max_scaler = preprocessing.MinMaxScaler()
        col = ['views', 'likes', 'dislikes', 'comment_count', 'likes_per_view',
               'comments_per_view', 'total_likes_dislikes_per_view', 'likes_percentage',
               'dislikes_percentage', 'publish_hour', 'title_characters', 'title_words',
               'description_characters', 'description_words', 'total_likes_dislikes']

        for column in col:
            x = videos[col].values.astype('float32')
            normed_videos = min_max_scaler.fit_transform(x)
            videos[col] = pd.DataFrame(normed_videos)

        numeric_videos = videos.select_dtypes(include=['number'])
        corr_matrix = numeric_videos.corr()
        corr_with_views = corr_matrix["views"].sort_values(ascending=False)
        print("Correlation of views with other variables:")
        print(corr_with_views)
        print("\n")

        plt.figure(figsize=(12, 18))
        plt.subplot(311)
        cat_graph_1 = sns.boxplot(x='category', y='views', data=videos)
        cat_graph_1.set_xticklabels(cat_graph_1.get_xticklabels(), rotation=45)
        plt.title('Plot of Views by Category')
        plt.subplot(312)
        cat_graph_2 = sns.boxplot(x='category', y='comment_count', data=videos)
        cat_graph_2.set_xticklabels(cat_graph_2.get_xticklabels(), rotation=45)
        plt.title('Plot of Comment Count by Category')
        plt.subplot(313)
        cat_graph_3 = sns.boxplot(x='category', y='total_likes_dislikes', data=videos)
        cat_graph_3.set_xticklabels(cat_graph_3.get_xticklabels(), rotation=45)
        plt.title('Plot of Total Likes and Dislikes by Category')
        plt.subplots_adjust(wspace=0.2, hspace=0.5, top=0.9)
        plt.show()
        print("\n")

        videos.drop_duplicates(subset="title", keep="last", inplace=True)

        plt.figure(figsize=(8, 8))
        plt.subplot(221)
        wordcloud = WordCloud(width=800, height=800, background_color="white",
                              stopwords=set(STOPWORDS), min_font_size=10).generate(str(videos["title"]))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.title('Word Cloud for Video Titles')
        plt.axis("off")
        plt.tight_layout(pad=1)
        plt.subplot(222)
        wordcloud = WordCloud(width=800, height=800, background_color="white",
                              stopwords=set(STOPWORDS), min_font_size=10).generate(str(videos["description"]))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.title('Word Cloud for Video Descriptions')
        plt.axis("off")
        plt.tight_layout(pad=1)
        plt.subplot(223)
        wordcloud = WordCloud(width=800, height=800, background_color="white",
                              stopwords=set(STOPWORDS), min_font_size=10).generate(str(videos["channel_title"]))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.title('Word Cloud for Channel Titles')
        plt.axis("off")
        plt.tight_layout(pad=1)
        plt.subplot(224)
        wordcloud = WordCloud(width=800, height=800, background_color="white",
                              stopwords=set(STOPWORDS), min_font_size=10).generate(str(videos["tags"]))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.title('Word Cloud for Video Tags')
        plt.axis("off")
        plt.tight_layout(pad=1)
        plt.show()
        print("\n")


if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = FileUploadApp()
    window.show()
    sys.exit(app.exec_())
