In [18]:
import feedparser
import numpy as np
import pandas as pd
import hashlib
from enum import Enum
from PyQt5 import QtWidgets, QtGui, QtCore
import pandas as pd

In [19]:
bbc_rss = ['http://feeds.bbci.co.uk/news/rss.xml', 
           'http://feeds.bbci.co.uk/news/world/rss.xml', 
           'http://feeds.bbci.co.uk/news/uk/rss.xml', 
           'http://feeds.bbci.co.uk/news/business/rss.xml', 
           'http://feeds.bbci.co.uk/news/politics/rss.xml', 
           'http://feeds.bbci.co.uk/news/health/rss.xml', 
           'http://feeds.bbci.co.uk/news/education/rss.xml', 
           'http://feeds.bbci.co.uk/news/science_and_environment/rss.xml', 
           'http://feeds.bbci.co.uk/news/technology/rss.xml', 
           'http://feeds.bbci.co.uk/news/entertainment_and_arts/rss.xml']

In [20]:
class RSS_Feeds:
    
    def __init__(self, urls):
        self.urls = urls
        self.feeds = self.get_feeds()
        self.df_news = self.create_df()
        self.df_unique_news = self.create_unique()
        
    def get_feeds(self):
        return [feedparser.parse(feed) for feed in self.urls]
    
    def get_category(self, feed):
        # sources may have different category names - agg categories?
        return feed.feed.get('title', '')

    def get_title_summary(self, feed, sep='. '): #get and join title and summary for each entry in feed
        titles = [entry['title'] for entry in feed['entries']]
        summaries = [entry['summary'] for entry in feed['entries']]
        title_summary = [entry['title'] + sep + entry['summary'] for entry in feed['entries']]
        return titles, summaries, title_summary
    
    def get_date(self, feed): #(year, month, day) for each entry in feed
        return([entry['published_parsed'][:3] for entry in feed['entries']])
    
    def get_time(self, feed): #(hour, min, sec) for each entry in feed
        return([entry['published_parsed'][3:6] for entry in feed['entries']])
    
    def get_datetime_nparsed(self, feed): #not parsed date and time for each entry in feed
        return([entry['published'] for entry in feed['entries']])
    
    def get_link(self, feed): # link for each entry in feed
        return([entry['link'] for entry in feed['entries']])
    
    def str2hash(self, s):
        return hashlib.md5(s.encode()).hexdigest()
    
    def create_df(self): 
        news, title, summary, category, pdate, ptime, fdatetime, links  = [], [], [], [], [], [], [], []
        for feed in self.feeds:
            cat = self.get_category(feed)
            titles, summaries, texts = self.get_title_summary(feed)
            d_ymd, t_hms = self.get_date(feed), self.get_time(feed)
            fdt = self.get_datetime_nparsed(feed)
            news_links = self.get_link(feed)
            
            cat = np.resize([cat], len(texts))
            news.extend(texts)
            title.extend(titles)
            summary.extend(summaries)
            pdate.extend(d_ymd)
            ptime.extend(t_hms)
            fdatetime.extend(fdt)
            links.extend(news_links)
            category.extend(cat)
        df_news = pd.DataFrame({'news':news, 
                                'category':category,
                                'title':title, 
                                'summary':summary,
                                'link':links,
                                'date':pdate, 
                                'time':ptime, 
                                'datetime':fdatetime})
        df_news['ID'] = df_news.news.apply(self.str2hash)
        self.df_news = df_news
        return df_news
    
    def create_unique(self):
        df_unique_news = self.df_news.groupby('news').agg({'category':list, 
                                                           'title': np.unique, 
                                                           'summary': np.unique, 
                                                           'link': np.unique, 
                                                           'date': np.unique, 
                                                           'time': np.unique, 
                                                           'datetime': np.unique, 
                                                           'ID': np.unique})
        df_unique_news.reset_index(inplace=True)
        self.df_unique_news = df_unique_news
        return df_unique_news
    
    def get_unique_news(self):
        return self.df_unique_news.news.values

### Dummy-Communicator für die GUI Entwicklung


In [21]:
class Communicator:
    '''Application start -> initialize Communicator instance and call `start()`
    to start an existing system or create a new system and get DataFrames, e.g.:
    
        CI = Communicator()
        news_in_categories, interesting_news = CI.start()
        display(news_in_categories)
    
    Then use `handle_input()` to process user input and get system output, e.g.:
    
        news_in_categories, interesting_news = CI.handle_input(u_input='upd')
    
    
    '''
    ### COmmunicator instanz ins init vom news dialog inkludiern


    def __init__(self): # Application start -> initialize Communicator instance 
        # call `start` to start an existing system or create a new system
        self.RSS = RSS_Feeds(bbc_rss)
        #self.start()
    
    def create_new_system(self):
        # create a new system without any already existing data
        pass
    
    def load_data_models(self):
        pass
        
    def select_news(self):
        categories = self.RSS.df_news.groupby('category').head(10)
        interesting = self.RSS.df_unique_news.sample(n=np.random.randint(15, 25), random_state=42)
        return categories, interesting
    
    def start(self):
        #try to load data
        #if ok -> start existing
        #if smth is not ok -> call `create_new_system`
        # return DataFrames
        c, i = self.select_news()
        return c, i


    def handle_input(self, u_input): # the main method that GUI has to call
        ''' Call this method with `u_input` argument to communicate with the system.
            It takes a string `u_input` and returns an appropriate output.
            
            Interactions as `u_input` -> `method output`:
            
            * 'upd' -> two pandas DataFrame objects (news in categories, interesting news)
            * 'exit' -> boolean: True, if data and models have been successfully saved, False otherwise
            * 'viewed' + ' ' + ID (e.g. 'viewed 005503512f38f130303cb133d656203b') -> two pandas DataFrame objects (news in categories, interesting news)
            * 'similar' + ' ' + ID (e.g. 'similar dc313bbf1bfca18d28e95862e972822f') -> pandas DataFrame with nearest news            
        '''
        # parse GUI input
        # call appropriate methods
        # return system output to GUI
        instruction = u_input.split()[0]
        if instruction == 'upd':
            #update news
            self.RSS = RSS_Feeds(bbc_rss)
            c, i = self.select_news()
            return c, i
        elif instruction == 'exit':
            #save
            return True
        elif instruction == 'viewed':
            #get news id and process viewed...
            n_id = u_input.split()[1:] # list, even if it consists of only one ID (generally)
            c, i = self.select_news()
            return c, i
        elif instruction == 'similar':
            #get id and return similar
            n_id = u_input.split()[1] # string
            return self.RSS.df_unique_news.sample(n=5, random_state=111)
        else:
            return('unknown input')
        
    
    def update_news(self):
        # download news from rss feeds
        # return DataFrames
        pass
    
    def save(self):
        # save data and models
        # return ok or error
        pass
    
    def handle_viewed(self):
        # get news ID, check if already in viewed, remove from all news,.....
        # if n(viewed) >= THRESHOLD -> update model -> save mpdel
        # return DataFrames
        pass
    
    def find_similar(self, n):
        # get and return the n nearest news to the given news item
        pass

In [22]:
CI = Communicator()

In [23]:
from_categories, interesting = CI.start()

In [24]:
from_categories.head(3)

Unnamed: 0,news,category,title,summary,link,date,time,datetime,ID
0,New China virus: Officials warn it 'could muta...,BBC News - Home,New China virus: Officials warn it 'could muta...,"China has said that it is now at the ""most cri...",https://www.bbc.co.uk/news/world-asia-china-51...,"(2020, 1, 22)","(6, 13, 42)","Wed, 22 Jan 2020 06:13:42 GMT",bb28dc190ca008696ab44be155eecb0c
1,Trump impeachment: Senators clash over rules a...,BBC News - Home,Trump impeachment: Senators clash over rules a...,The Senate rejects Democratic attempts to obta...,https://www.bbc.co.uk/news/world-us-canada-511...,"(2020, 1, 22)","(5, 5, 56)","Wed, 22 Jan 2020 05:05:56 GMT",5a98860e1ec5e9558dda435fcf455073
2,Kate launches childhood survey to help under-f...,BBC News - Home,Kate launches childhood survey to help under-f...,The Duchess of Cambridge meets children in Bir...,https://www.bbc.co.uk/news/uk-51192909,"(2020, 1, 22)","(5, 35, 48)","Wed, 22 Jan 2020 05:35:48 GMT",08d8bdb90bbd750a31365439d7c40046


In [25]:
c, i = CI.handle_input('upd')

In [26]:
c, i = CI.handle_input('viewed 51f007e7fca0248885436004338df61b')

In [27]:
s = CI.handle_input('similar 51f007e7fca0248885436004338df61b')

In [28]:
CI.handle_input('exit')

True

In [29]:
help(Communicator)

Help on class Communicator in module __main__:

class Communicator(builtins.object)
 |  Application start -> initialize Communicator instance and call `start()`
 |  to start an existing system or create a new system and get DataFrames, e.g.:
 |  
 |      CI = Communicator()
 |      news_in_categories, interesting_news = CI.start()
 |      display(news_in_categories)
 |  
 |  Then use `handle_input()` to process user input and get system output, e.g.:
 |  
 |      news_in_categories, interesting_news = CI.handle_input(u_input='upd')
 |  
 |  Methods defined here:
 |  
 |  __init__(self)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  create_new_system(self)
 |  
 |  find_similar(self, n)
 |  
 |  handle_input(self, u_input)
 |      Call this method with `u_input` argument to communicate with the system.
 |      It takes a string `u_input` and returns an appropriate output.
 |      
 |      Interactions as `u_input` -> `method output`:
 |      
 |      * '

In [30]:
help(Communicator.handle_input)

Help on function handle_input in module __main__:

handle_input(self, u_input)
    Call this method with `u_input` argument to communicate with the system.
    It takes a string `u_input` and returns an appropriate output.
    
    Interactions as `u_input` -> `method output`:
    
    * 'upd' -> two pandas DataFrame objects (news in categories, interesting news)
    * 'exit' -> boolean: True, if data and models have been successfully saved, False otherwise
    * 'viewed' + ' ' + ID (e.g. 'viewed 005503512f38f130303cb133d656203b') -> two pandas DataFrame objects (news in categories, interesting news)
    * 'similar' + ' ' + ID (e.g. 'similar dc313bbf1bfca18d28e95862e972822f') -> pandas DataFrame with nearest news



In [31]:
class NewsCategory():
  CAT = {
      0: 'Home',
      1: 'World',
      2: 'UK',
      3: 'Business',
      4: 'UK Politics',
      5: 'Health',
      6: 'Family & Education',
      7: 'Science & Environment',
      8: 'Technology',
      9: 'Entertainment & Arts',
      10: 'Interesting News'
      }

In [32]:
class NewsModel():
  # index comes from order in get_df_row
  COL_ID: int = 0
  COL_TITLE: int = 1
  COL_SUMMARY: int = 2
  COL_LINK: int = 3
  COL_DATE: int = 4

  _result_model: QtGui.QStandardItemModel = QtGui.QStandardItemModel()
  _selected_cat: int = 0

  def __init__(self):
   self._df: pd.DataFrame = pd.read_csv(r"C:\Users\kaszo\Documents\Dokumente Katalin Feichtinger\Data Science 1. Sem\Analysewerkzeuge\Analysewerkzeuge_Python_Team-master\data_example\from_rss_categories.csv")

  def model(self) -> QtGui.QStandardItemModel:
    return self._result_model

  def get_df_row(self, row) -> [QtGui.QStandardItem]:
      self._result_model.appendRow([ \
      QtGui.QStandardItem(row[0]),\
      QtGui.QStandardItem(row['title']),\
      QtGui.QStandardItem(row['summary']),\
      QtGui.QStandardItem(row['link']),\
      QtGui.QStandardItem(row['datetime'])\
      ]) 
      return
  
  def set_cat(self, category, gen_articles, pers_articles):
    if category == 10:
        df = pers_articles
    else: 
        self._selected_cat = 'BBC News - ' + NewsCategory.CAT[category]
        df = gen_articles.query('category==@self._selected_cat')
    
    self._result_model: QtGui.QStandardItemModel = QtGui.QStandardItemModel()
    for i, row in df.iterrows():
      #self.get_df_row(row[COL_ID], row[COL_TITLE], row[COL_SUMMARY], row[COL_LINK], row[COL_DATE], row[COL_CATEGORY]
      self.get_df_row(row)
    
    # alternatively, try via List Comprehension?
    #result = [self.get_df_row(row[COL_ID], row[COL_TITLE], row[COL_SUMMARY], row[COL_LINK], row[COL_DATE], row[COL_CATEGORY]) for row in self._df[['id', 'title', 'summary', 'link', 'datetime', 'category']].values]
    #result = [self.get_df_row(row[0], row[1], row[2], row[3], row[4], row[5]) for row in self._df[['id', 'title', 'summary', 'link', 'datetime', 'category']].values]


In [33]:
#class NewsDialog(QtWidgets.QDialog):
class NewsDialog(QtWidgets.QMainWindow):

  WGT_MAX_SIZE: QtCore.QSize = QtCore.QSize(180, 30)

  def __init__(self):

    self._communicator = Communicator()
    self.gen_articles, self.pers_articles = self._communicator.start()
    self._model = NewsModel()
    self._model.set_cat(0, self.gen_articles, self.pers_articles)
    
    # Init Category 'Home'
    # moved _cat_selected into NewsModel
    # self._cat_selected = "pers"

    #QtWidgets.QDialog.__init__(self)
    QtWidgets.QMainWindow.__init__(self)
 
    # Main Window
    available_geometry: QtCore.QRect = self.screen().availableGeometry()
    self.resize(available_geometry.width() / 2, available_geometry.height() / 2)
    self.move((available_geometry.width() - self.width()) / 2, (available_geometry.height() - self.height()) / 2)

    # Root Container
    self._box_root: QtWidgets.QBoxLayout = QtWidgets.QVBoxLayout()
    
    # Top Container in Root
    self._box_top: QtWidgets.QBoxLayout = QtWidgets.QHBoxLayout()
    self._box_root.addLayout(self._box_top)

    # Spacer in Top, full width. expand horizontally. results in widget Min Size for other widgets
    self._box_top.addSpacerItem(QtWidgets.QSpacerItem(self.width() * 1/2, 0, hPolicy = QtWidgets.QSizePolicy.MinimumExpanding, vPolicy = QtWidgets.QSizePolicy.Minimum))

    # Category Drop-Down in Top
    self._ctr_categories: QtWidgets.QComboBox = QtWidgets.QComboBox()
    self._ctr_categories.setMaximumSize(NewsDialog.WGT_MAX_SIZE)
    self._ctr_categories.setMinimumSize(NewsDialog.WGT_MAX_SIZE)
    self._box_top.addWidget(self._ctr_categories)

    # connect Drop-Down UI event to local function
    self._ctr_categories.currentIndexChanged.connect(self.cat_changed)

    # add Category values to dropdown
    for i in NewsCategory.CAT:
      self._ctr_categories.addItem(str(NewsCategory.CAT[i]), i)

    # Main List / Table widget in Root
    self._main_list: QtWidgets.QTableView = QtWidgets.QTableView()
    self._box_root.addWidget(self._main_list)

   
    # Hide MainList Headers
    self._h_header: QtWidgets.QHeaderView = self._main_list.horizontalHeader()
    self._h_header.hide()
    self._v_header: QtWidgets.QHeaderView = self._main_list.verticalHeader()
    self._v_header.hide()

    # Update List Values
    self.cat_changed()

    # Bottom Container in Root
    self._box_bottom: QtWidgets.QBoxLayout = QtWidgets.QHBoxLayout()
    self._box_root.addLayout(self._box_bottom)

    self._box_bottom.addSpacerItem(QtWidgets.QSpacerItem(self.width() * 1/2, 0, hPolicy = QtWidgets.QSizePolicy.MinimumExpanding, vPolicy = QtWidgets.QSizePolicy.Minimum))
    
    # Update Button in Bottom
    self._btn_update: QtWidgets.QPushButton = QtWidgets.QPushButton("Update")
    self._btn_update.setMaximumSize(NewsDialog.WGT_MAX_SIZE)
    self._btn_update.setMinimumSize(NewsDialog.WGT_MAX_SIZE)
    self._box_bottom.addWidget(self._btn_update)

    # Button event to local func
    self._btn_update.clicked.connect(self.update_clicked)
    
  
    #self.setLayout(self._box_root)
    # Root Widget for Root Container Layout
    self._wgt_root = QtWidgets.QWidget()
    self._wgt_root.setLayout(self._box_root)

    # Root Widget to QMainWindow
    self.setCentralWidget(self._wgt_root)

    self.show()
  
  def cat_changed(self):
    # check for changes in DD

    # Drop Down signal can be triggered before __init__
    # da self.main_list im _init_ zwar erzeugt ist, aber erst nach self._ctr_categories.currentIndexChanged.connect(self.cat_changed) und self._ctr_categories.addItem(str(NewsCategory.CAT[i]), i), wird eine Fehlermeldung ausgegeben. hasattr prüft, ob das darin genannte attribut am genannten objekt (self in dem fall) existiert, Fehlermeldung wird behoben
    if not hasattr(self, '_main_list'):
     return
    
    # Call to one specific DD-Control Widget
    # new_cat = self._ctr_categories.currentData()
    
    # Better use actual sender object
    wgt_sender: QtWidgets.QComboBox = self.sender()
    if wgt_sender is not None:
      new_cat = wgt_sender.currentData()
    else:
      new_cat = None

    if new_cat is not None:
      #self._cat_selected = new_cat
      self._model.set_cat(new_cat, self.gen_articles, self.pers_articles)

    # TODO: use correct Model, based on _cat_selected
    #selected_model: QtGui.QStandardItemModel = self._model.model(self._cat_selected)

    # model now always selected!
    selected_model = self._model.model()
    self._main_list.setModel(selected_model)

    # loop over selected Model
    for i in range(selected_model.rowCount()):

      # get relevant Cell indices (QStandardModelItem.index())
      cur_item_ID_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_ID)
      cur_item_title_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_TITLE)
      #cur_item_title = self._model.model(self._cat_selected).index(i, 0)
      cur_item_summary_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_SUMMARY)
      cur_item_link_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_LINK)
      cur_item_date_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_DATE)

      # root widget for current line
      cur_wdgt: QtWidgets.QWidget = QtWidgets.QWidget()

      # Root Container for Root Widget
      # Vertical
      cur_line: QtWidgets.QVBoxLayout = QtWidgets.QVBoxLayout()
      cur_wdgt.setLayout(cur_line)

      # Top Container, header & date?
      # horizontal
      cur_line_top: QtWidgets.QHBoxLayout = QtWidgets.QHBoxLayout()

      # Title to Top
      # from model Data via cell index
      cur_title_str = selected_model.data(cur_item_title_ix)
      cur_title: QtWidgets.QLabel = QtWidgets.QLabel(cur_title_str)
      cur_line_top.addWidget(cur_title)

      # spacer to top, so Date is aligned right
      cur_line_top.addSpacerItem(QtWidgets.QSpacerItem(0, 0, hPolicy = QtWidgets.QSizePolicy.MinimumExpanding, vPolicy = QtWidgets.QSizePolicy.Minimum))

      # Date to Top
      # via cell index
      cur_date_str = selected_model.data(cur_item_date_ix)
      cur_date: QtWidgets.QLabel = QtWidgets.QLabel(cur_date_str)
      cur_line_top.addWidget(cur_date)


      # add summary to 'title' column, in VBoxlayout (cur_line)
      cur_summary_str = selected_model.data(cur_item_summary_ix)
      cur_summary: QtWidgets.QLabel = QtWidgets.QLabel(cur_summary_str)


      # add Top, Summary to vert. Root Container
      cur_line.addLayout(cur_line_top)
      cur_line.addWidget(cur_summary)

      # set new Root Widget on Position of Title Column via cell index
      self._main_list.setIndexWidget(cur_item_title_ix, cur_wdgt)

      # overwrite title column text with new Root Widget
      cur_wdgt.setAutoFillBackground(True)
      

      # Second Column
      # set Button Widget (+ container) in Place of Link Col. via cell index
      cur_btn_layout: QtWidgets.QBoxLayout = QtWidgets.QBoxLayout(QtWidgets.QBoxLayout.TopToBottom)
      cur_btn_wgt: QtWidgets.QWidget = QtWidgets.QWidget()
      cur_btn_wgt.setAutoFillBackground(True)
      cur_btn_wgt.setLayout(cur_btn_layout)

      cur_btn: QtWidgets.QPushButton = QtWidgets.QPushButton('Link')
      cur_btn.setMaximumSize(NewsDialog.WGT_MAX_SIZE)
      #cur_btn.setMinimumSize(NewsDialog.WGT_MAX_SIZE)
      cur_btn_layout.addWidget(cur_btn, QtCore.Qt.AlignCenter)

      cur_btn.clicked.connect(self.link_clicked)
      #self._main_list.clicked.connect(self.link_clicked)
      self._main_list.setIndexWidget(cur_item_link_ix, cur_btn_wgt)


    # statically hide columns from model
    self._main_list.setColumnHidden(NewsModel.COL_ID, True)
    self._main_list.setColumnHidden(NewsModel.COL_SUMMARY, True)
    self._main_list.setColumnHidden(NewsModel.COL_DATE, True)
    self._main_list.resizeRowsToContents()

    # moved from __init__
    if selected_model.rowCount() > 0:
      self._h_header.setSectionResizeMode(1, QtWidgets.QHeaderView.Stretch)

  #def resizeEvent(self, old_size: QtCore.QSize, new_size: QtCore.QSize):
  def resizeEvent(self, event: QtGui.QResizeEvent):
    self._main_list.resizeRowsToContents()

  def update_clicked(self, gen_articles, pers_articles):
    # TODO: call update function of DataFrame
    # TODO: call cat_changed for Table Refresh
    # hier wäre die ID: cur_item_ID_ix: QtCore.QModelIndex = selected_model.index(i, NewsModel.COL_ID), hab die Verknüpfung nicht ordentlich hinbekommen..
    ### ID bekommen und self._communicator.handle_input('upd '), 2 dataframes werden retourniert,speichern, anzeigen
    
    msgbox: QtWidgets.QMessageBox = QtWidgets.QMessageBox()
    msgbox.setText("update clicked")
    msgbox.exec()

  def link_clicked(self):
    wgt_sender: QtWidgets.QWidget = self.sender().parent()
    #row = self._main_list.indexAt(table.pos()).row()
    link_ix: QtCore.QModelIndex = self._main_list.indexAt(wgt_sender.pos())
    linktext = self._model.model().data(link_ix)
  
    ### ID bekommen und self._communicator.handle_input('viewed '+ ID )
    ### die 2 dateframes werden ausgegeben, diese sollen self.gen_articles und pers_articles => erneuerte Artikel erneut angezeigt werden! Kategorie,die zum letztem Mal geöffnet wurde,
    #linktext = selected_model.itemFromIndex(link_ix).text()
    #linktext = str(self._main_list.indexAt(wgt_sender.pos()).row())
    QtGui.QDesktopServices.openUrl(QtCore.QUrl(linktext))

    # exit (fenster schlissen) bearbeiten => handle_input('exit')
    self.exit_button.clicked.connect(self.quit, parent=self)


In [34]:
if __name__ == "__main__":
  QAPP = QtWidgets.QApplication([])
  DIALOG = NewsDialog()
  QAPP.exec_()