In [66]:
import pandas as pd
import re
import numpy as np

In [202]:
df = pd.read_csv('main_task.csv')

In [201]:
class Preproc():
    """Класс для предобработки"""

    def __init__(self, data):
        """Инициализируем класс и применим к объекту все методы для предобработки"""
        self.data = data
        self.clear_cuis(self.data)
        self.edit_cuisine(self.data)
        self.cuisine_types(self.data)
        self.reviews_date(self.data)

    # Очистим колонку "Cuisine Style"

    def clear_cuis(self, data):
        self.data['Cuisine Style'] = self.data['Cuisine Style'].str.replace(
            r'[\[\]]', '')

    # Создаем списки из значений кухонь
    def edit_cuisine(self, data):
        self.data['Cuisine Style'] = self.data['Cuisine Style'].apply(
            lambda x: np.nan if pd.isnull(x) else x.split(','))

    # Содаем новый признак с количеством представленных кухонь
    def cuisine_types(self, data):
        def count_types(row):
            try:
                return len(row)
            except:
                return 1
        self.data['number_of_cuisine_types'] = self.data['Cuisine Style'].apply(
            count_types)

    # Определим дату последних отзывов
    def reviews_date(self, data):
        # Создадим временный признак, содержащий даты отзывов
        def catch_date(row):
            pattern_date = re.compile('\d\d\/\d\d\/\d\d\d\d')
            try:
                result = re.findall(pattern_date, row)
                return result
            except:
                return np.nan
        # Дата последнего отзыва

        def last_review(dates):
            date_temp = []
            try:
                for date in dates:
                    date = pd.Timestamp(date)
                    date_temp.append(date)
                return max(date_temp)
            except:
                return np.nan
        # Дата второго отзыва

        def second_review(dates):
            date_temp = []
            try:
                for date in dates:
                    date = pd.Timestamp(date)
                    date_temp.append(date)
                if len(date_temp) > 1:
                    return min(date_temp)
                else:
                    return np.nan
            except:
                return np.nan
        self.data['Reviews Date'] = self.data['Reviews'].apply(catch_date)
        self.data['Last Review'] = self.data['Reviews Date'].apply(last_review)
        self.data['Second Review'] = self.data['Reviews Date'].apply(
            second_review)

In [203]:
Preproc(df)

<__main__.Preproc at 0x14ebfcac220>

In [188]:
df['Last Review']

0       1970-01-01
1       1970-01-01
2       1970-01-01
3       1970-01-01
4       1970-01-01
           ...    
39995   1970-01-01
39996   1970-01-01
39997   1970-01-01
39998   1970-01-01
39999   1970-01-01
Name: Last Review, Length: 40000, dtype: datetime64[ns]

In [204]:
df

Unnamed: 0,Restaurant_id,City,Cuisine Style,Ranking,Rating,Price Range,Number of Reviews,Reviews,URL_TA,ID_TA,number_of_cuisine_types,Reviews Date,Last Review,Second Review
0,id_5569,Paris,"['European', 'French', 'International']",5570.0,3.5,$$ - $$$,194.0,"[['Good food at your doorstep', 'A good hotel ...",/Restaurant_Review-g187147-d1912643-Reviews-R_...,d1912643,3,"[12/31/2017, 11/20/2017]",2017-12-31,2017-11-20
1,id_1535,Stockholm,,1537.0,4.0,,10.0,"[['Unique cuisine', 'Delicious Nepalese food']...",/Restaurant_Review-g189852-d7992032-Reviews-Bu...,d7992032,1,"[07/06/2017, 06/19/2016]",2017-07-06,2016-06-19
2,id_352,London,"['Japanese', 'Sushi', 'Asian', 'Grill', 'V...",353.0,4.5,$$$$,688.0,"[['Catch up with friends', 'Not exceptional'],...",/Restaurant_Review-g186338-d8632781-Reviews-RO...,d8632781,7,"[01/08/2018, 01/06/2018]",2018-01-08,2018-01-06
3,id_3456,Berlin,,3458.0,5.0,,3.0,"[[], []]",/Restaurant_Review-g187323-d1358776-Reviews-Es...,d1358776,1,[],NaT,NaT
4,id_615,Munich,"['German', 'Central European', 'Vegetarian F...",621.0,4.0,$$ - $$$,84.0,"[['Best place to try a Bavarian food', 'Nice b...",/Restaurant_Review-g187309-d6864963-Reviews-Au...,d6864963,3,"[11/18/2017, 02/19/2017]",2017-11-18,2017-02-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39995,id_499,Milan,"['Italian', 'Vegetarian Friendly', 'Vegan Op...",500.0,4.5,$$ - $$$,79.0,"[['The real Italian experience!', 'Wonderful f...",/Restaurant_Review-g187849-d2104414-Reviews-Ro...,d2104414,4,"[12/16/2017, 11/12/2017]",2017-12-16,2017-11-12
39996,id_6340,Paris,"['French', 'American', 'Bar', 'European', ...",6341.0,3.5,$$ - $$$,542.0,"[['Parisian atmosphere', 'Bit pricey but inter...",/Restaurant_Review-g187147-d1800036-Reviews-La...,d1800036,5,"[12/21/2017, 12/12/2017]",2017-12-21,2017-12-12
39997,id_1649,Stockholm,"['Japanese', 'Sushi']",1652.0,4.5,,4.0,"[['Good by swedish standards', 'A hidden jewel...",/Restaurant_Review-g189852-d947615-Reviews-Sus...,d947615,2,"[11/03/2016, 04/12/2008]",2016-11-03,2008-04-12
39998,id_640,Warsaw,"['Polish', 'European', 'Eastern European', ...",641.0,4.0,$$ - $$$,70.0,"[['Underground restaurant', 'Oldest Restaurant...",/Restaurant_Review-g274856-d1100838-Reviews-Ho...,d1100838,5,"[07/11/2017, 06/18/2017]",2017-07-11,2017-06-18
