### General statistics ###

In [1]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
from scipy.stats import variation

from pandas.plotting import scatter_matrix
from string import punctuation


In [2]:
na_values = ['nan', 'N/A', 'NaN', 'NaT', '', 'Text', 'repost', 'r e p o s t -', 'Repost']
#na_values = ['nan', 'N/A', 'NaN', 'NaT', '0', '', 'Text', 'repost', 'r e p o s t -', 'Repost']
ira_data = pd.read_csv('../Data/data_IRA_Ads.csv', sep=";", parse_dates=['AD_CREATION_DATE', 'AD_END_DATE'], 
                       dayfirst=False, index_col='AD_ID', na_values=na_values)
ira_data = ira_data[ira_data['AD_IMPRESSIONS']>0]

In [3]:
ira_data.insert(5, 'AD_CLICKS_PER_VIEW', ira_data['AD_CLICKS']/ira_data['AD_IMPRESSIONS'])

In [4]:
epoch = datetime.datetime.utcfromtimestamp(0)

def unix_time_millis(dt):
    return (dt - epoch).total_seconds() * 1000.0

In [5]:
ira_data['AD_CREATION_WEEKDAY'] = ira_data['AD_CREATION_DATE'].dt.dayofweek
ira_data['AD_CREATION_YEAR'] = ira_data['AD_CREATION_DATE'].dt.year
ira_data['AD_CREATION_MONTH'] = ira_data['AD_CREATION_DATE'].dt.month
ira_data['AD_CREATION_DAY'] = ira_data['AD_CREATION_DATE'].dt.day
ira_data['AD_CREATION_TIME'] = ira_data['AD_CREATION_DATE'].dt.time
ira_data['AD_CREATION_EPOCH_TIME'] = ira_data['AD_CREATION_DATE'].apply(lambda x: unix_time_millis(x))

In [6]:
ira_data['AD_SPEND']=ira_data['AD_SPEND'].str.replace(' RUB','')

In [7]:
ira_data=ira_data.dropna(subset=['AD_TEXT']);
ira_data['AD_TEXT'].count()

2533

In [8]:
ira_data['AD_TEXT']=ira_data['AD_TEXT'].str.replace(r'https?:\/\/.*[\r\n]*', '')
ira_data['AD_TEXT']=ira_data['AD_TEXT'].str.replace(r'http?:\/\/.*[\r\n]*', '')

In [9]:
duplicateDF = pd.concat(g for _, g in ira_data.groupby("AD_TEXT") if len(g) > 1).copy()
duplicateDF.sort_values(by=['AD_TEXT'], inplace=True)

In [10]:
uniqueDF = ira_data.copy()
uniqueDF.drop_duplicates(subset ='AD_TEXT', 
                     keep = False, inplace = True)

In [11]:
ira_data['AD_TEXT'].count()

2533

In [12]:
duplicateDF['AD_TEXT'].count()

614

In [13]:
uniqueDF['AD_TEXT'].count()

1919

In [14]:
duplicateDF['AD_TEXT'].nunique()

203

In [15]:
duplicateDF['AD_TEXT'].value_counts().values

array([13, 12,  9,  8,  7,  7,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  4,  4,
        4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2],
      dtype=int64)

In [16]:
duplicateDF[(duplicateDF['AD_IMPRESSIONS']>0) & duplicateDF['AD_TEXT'].str.match('We speak for all fellow members of LGBT community across the nation.')][['AD_CLICKS', 'AD_IMPRESSIONS', 'AD_CLICKS_PER_VIEW', 'AD_CREATION_DATE', 'AD_CREATION_WEEKDAY','AD_CREATION_YEAR', 'AD_CREATION_MONTH','AD_CREATION_DAY','AD_CREATION_TIME','AD_TEXT', 'AD_SPEND', 'AD_POTENTIAL_TARGET']].head(20)

Unnamed: 0_level_0,AD_CLICKS,AD_IMPRESSIONS,AD_CLICKS_PER_VIEW,AD_CREATION_DATE,AD_CREATION_WEEKDAY,AD_CREATION_YEAR,AD_CREATION_MONTH,AD_CREATION_DAY,AD_CREATION_TIME,AD_TEXT,AD_SPEND,AD_POTENTIAL_TARGET
AD_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
669,91,1997,0.045568,2015-06-17 03:15:18,2,2015,6,17,03:15:18,We speak for all fellow members of LGBT commun...,2673.94,144000000.0
591,5441,93925,0.057929,2016-03-23 05:36:11,2,2016,3,23,05:36:11,We speak for all fellow members of LGBT commun...,31713.12,
590,6566,67221,0.097678,2015-11-05 23:11:58,3,2015,11,5,23:11:58,We speak for all fellow members of LGBT commun...,46994.76,36000000.0
588,21449,175460,0.122244,2015-07-14 00:01:38,1,2015,7,14,00:01:38,We speak for all fellow members of LGBT commun...,93754.38,41000000.0
587,542,15175,0.035717,2015-07-09 01:50:28,3,2015,7,9,01:50:28,We speak for all fellow members of LGBT commun...,6201.7,35000000.0
586,59,1811,0.032579,2015-07-07 01:51:07,1,2015,7,7,01:51:07,We speak for all fellow members of LGBT commun...,757.65,166000000.0
592,7289,190076,0.038348,2016-08-04 04:20:05,3,2016,8,4,04:20:05,We speak for all fellow members of LGBT commun...,36000.0,
584,3206,31618,0.101398,2015-06-23 05:06:57,1,2015,6,23,05:06:57,We speak for all fellow members of LGBT commun...,16630.2,143000000.0
583,186,4265,0.043611,2015-06-22 08:19:07,0,2015,6,22,08:19:07,We speak for all fellow members of LGBT commun...,1393.55,139000000.0
582,1724,18891,0.09126,2015-06-18 00:58:48,3,2015,6,18,00:58:48,We speak for all fellow members of LGBT commun...,10085.77,35000000.0


### Common Statistics ###

**TODO** Mean, median etc. for success --> Chart(?)


In [17]:
ira_data.describe()

Unnamed: 0,AD_CLICKS,AD_IMPRESSIONS,AD_CLICKS_PER_VIEW,AD_POTENTIAL_TARGET,AD_CREATION_WEEKDAY,AD_CREATION_YEAR,AD_CREATION_MONTH,AD_CREATION_DAY,AD_CREATION_EPOCH_TIME
count,2533.0,2533.0,2533.0,1727.0,2533.0,2533.0,2533.0,2533.0,2533.0
mean,1412.772996,15619.95,0.101251,30069230.0,2.362811,2016.159889,5.980261,14.949862,1470950000000.0
std,3845.085612,52755.44,0.078493,41249470.0,1.324155,0.679258,3.16284,7.368988,18401620000.0
min,0.0,1.0,0.0,1900.0,0.0,2015.0,1.0,1.0,1433822000000.0
25%,38.0,601.0,0.045496,3800000.0,1.0,2016.0,4.0,10.0,1460339000000.0
50%,236.0,3359.0,0.10099,24000000.0,2.0,2016.0,5.0,14.0,1472430000000.0
75%,1390.0,12653.0,0.143902,37000000.0,3.0,2017.0,8.0,21.0,1487746000000.0
max,73063.0,1334544.0,1.690789,253000000.0,6.0,2017.0,12.0,31.0,1502664000000.0


In [18]:
test = duplicateDF.groupby(by=['AD_TEXT'])['AD_CLICKS'].describe()
test
#variation(duplicateDF[duplicateDF['AD_TEXT'].str.match('We speak for all fellow members of LGBT community across the nation.')]['AD_CLICKS_PER_VIEW'])
#variation(duplicateDF[duplicateDF['AD_TEXT'].str.match('We speak for all fellow members of LGBT community across the nation.')]['AD_CLICKS_PER_VIEW'])
#testing = duplicateDF.groupby(by=['AD_TEXT'])
#testing_mean = testing['AD_CLICKS_PER_VIEW'].std()
#testing_mean.sort_values()

#testing_std = variation(testing['AD_CLICKS_PER_VIEW'])
#testing_count = testing.count()
#testing_std.plot()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
AD_TEXT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"A new video shows Officer Jason Stockley planting a gun on Anthony Lamar Smith after he shot and killed him. A new revelation has popped up following the release of a new video of the 2011 police fatal killing in St. Louis. The video captures Officer Jason Stockley, th St. Louis Killer Cop Caught In On Camera I BM BM",2.0,133.500000,188.797511,0.0,66.75,133.5,200.25,267.0
A page to unit all Muslim people living in the USA:,3.0,95.333333,92.813433,33.0,42.00,51.0,126.50,202.0
A rally in loving memory of Aiyana Jones May 16 will mark the sixth anniversary of the br The 6th Anniversary Of The Aiyana Jones' Death,5.0,821.200000,1006.310688,0.0,314.00,442.0,804.00,2546.0
"A tiny browser extension will help you to browse, listen and share any kind of music! Free online music library! musicfb.info FaceMusic Stop A.I.",3.0,10.666667,15.143756,0.0,2.00,4.0,16.00,28.0
"A young black guy who was mentally disabled decided to end his life, but will only do that by making the cops kill him. This is absolutely insane in the highest order. He took a gun and ordered his other two brothers to take a video of the whole incident. Michael Ford, at the police vicinity opened fire and got the cop's attention. The filming was still in process as he commanded. The first cops who appeared fired back and almost immediately ended up drawing fire. He gave the other officer a chance to get into position when he opened fire. It's quite not understandable why his own cop shot him and shot the mentality ill man too. This is a good example of sick reality too. Is there any explanation why cops have started shooting each other? Follow my Facebook:",2.0,446.500000,12.020815,438.0,442.25,446.5,450.75,455.0
"ATTENTION, NEWYORK! We need your support! We'll provide you with free food and coffee. Come and bring your friends! Join the FB event for more details:",3.0,212.000000,186.903719,0.0,141.50,283.0,318.00,353.0
African American Civil Rights Movement!,2.0,7765.000000,7841.814203,2220.0,4992.50,7765.0,10537.50,13310.0
"Alek Wek is now a hero for many of us. Supermodel decided not to go with the crowd that set the European standards of beauty. Despite the fact that all of the fashion models were obliged to wear blond wigs, she did something that was against the norm- she took her wig off and threw it in the crowd. #YouGoGirl",2.0,1954.500000,2600.031634,116.0,1035.25,1954.5,2873.75,3793.0
"America has always been hinged on hard-working people. If you remove jobs, you'll remove our country from the world map. The state of Pennsylvania rose owing to multiple enterprises mining coal, producing steel, and creating the need for other jobs, groceries, doctors, dentists, insurance, gas, vehicles, mechanics and the list goes on. As far as Mr. Trump pursues the goal of creating more jobs and supports the working class. He said he would put miners back to work. We could help Mr. Trump win Pennsylvania which is a battleground state. We'd like to organize a rally ""Miners for Trump"" in Pennsylvania. Have something against coal industries? Please note then that burning coal is not more harmful than lumber. Alternative energy is only possible when subsidized by government for it is not lucrative. You cannot leave tens of thousands of people without a job just because of lobbyists' interests. The current list of locations is being elaborated. Suggested cities are Erie, Pittsburg, Scranton, Harrisburg, Allentown, and Philly. Confirmed locations: Marconi Plaza, Philadelphia. Miners for Trump: Unity day in Pennsylvania",3.0,178.000000,244.669164,0.0,38.50,77.0,267.00,457.0
"August 9, 2016, marks a two-year anniversary of the brutal murder of Michael Brown. an 18- Rally In Memory Of Mike Brown",2.0,1505.500000,655.487986,1042.0,1273.75,1505.5,1737.25,1969.0


In [19]:
compair_AD_CLICKS = [[ira_data['AD_CLICKS'].count(),duplicateDF['AD_CLICKS'].count(),uniqueDF['AD_CLICKS'].count()],[ira_data['AD_CLICKS'].min(),duplicateDF['AD_CLICKS'].min(),uniqueDF['AD_CLICKS'].min()],[ira_data['AD_CLICKS'].max(),duplicateDF['AD_CLICKS'].max(),uniqueDF['AD_CLICKS'].max()],[ira_data['AD_CLICKS'].median(),duplicateDF['AD_CLICKS'].median(),uniqueDF['AD_CLICKS'].median()],[ira_data['AD_CLICKS'].mean(),duplicateDF['AD_CLICKS'].mean(),uniqueDF['AD_CLICKS'].mean()],[ira_data['AD_CLICKS'].std(),duplicateDF['AD_CLICKS'].std(),uniqueDF['AD_CLICKS'].std()],[ira_data['AD_CLICKS'].var(),duplicateDF['AD_CLICKS'].var(),uniqueDF['AD_CLICKS'].var()],[variation(ira_data['AD_CLICKS']),variation(duplicateDF['AD_CLICKS']),variation(uniqueDF['AD_CLICKS'])]]
pd.DataFrame(compair_AD_CLICKS,columns=["All", "Duplicates","Unique"], index=["Count","Minimum","Maximum","Median","Mean","Standard deviation","Variance", "Coefficient of variation"]).style.format("{:.10g}")

Unnamed: 0,All,Duplicates,Unique
Count,2533.0,614.0,1919.0
Minimum,0.0,0.0,0.0
Maximum,73063.0,72043.0,73063.0
Median,236.0,75.0,317.0
Mean,1412.772996,1564.07329,1364.36321
Standard deviation,3845.085612,5341.859209,3223.029478
Variance,14784683.37,28535459.8,10387919.02
Coefficient of variation,2.721121191,3.412568594,2.361680216


In [21]:
compair_AD_IMPRESSIONS = [[ira_data['AD_IMPRESSIONS'].count(),duplicateDF['AD_IMPRESSIONS'].count(),uniqueDF['AD_IMPRESSIONS'].count()],[ira_data['AD_IMPRESSIONS'].min(),duplicateDF['AD_IMPRESSIONS'].min(),uniqueDF['AD_IMPRESSIONS'].min()],[ira_data['AD_IMPRESSIONS'].max(),duplicateDF['AD_IMPRESSIONS'].max(),uniqueDF['AD_IMPRESSIONS'].max()],[ira_data['AD_IMPRESSIONS'].median(),duplicateDF['AD_IMPRESSIONS'].median(),uniqueDF['AD_IMPRESSIONS'].median()],[ira_data['AD_IMPRESSIONS'].mean(),duplicateDF['AD_IMPRESSIONS'].mean(),uniqueDF['AD_IMPRESSIONS'].mean()],[ira_data['AD_IMPRESSIONS'].std(),duplicateDF['AD_IMPRESSIONS'].std(),uniqueDF['AD_IMPRESSIONS'].std()],[ira_data['AD_IMPRESSIONS'].var(),duplicateDF['AD_IMPRESSIONS'].var(),uniqueDF['AD_IMPRESSIONS'].var()],[variation(ira_data['AD_IMPRESSIONS']),variation(duplicateDF['AD_IMPRESSIONS']),variation(uniqueDF['AD_IMPRESSIONS'])]]
pd.DataFrame(compair_AD_IMPRESSIONS,columns=["All", "Duplicates","Unique"], index=["Count","Minimum","Maximum","Median","Mean","Standard deviation","Variance", "Coefficient of variation"]).style.format("{:.10g}")

Unnamed: 0,All,Duplicates,Unique
Count,2533.0,614.0,1919.0
Minimum,1.0,1.0,1.0
Maximum,1334544.0,968768.0,1334544.0
Median,3359.0,1516.0,3970.0
Mean,15619.95026,22629.7443,13377.10839
Standard deviation,52755.43891,75019.64608,43065.2077
Variance,2783136335.0,5627947298.0,1854612115.0
Coefficient of variation,3.37677287,3.312389627,3.218482219


In [22]:
compair_AD_CLICKS_PER_VIEW = [[ira_data['AD_CLICKS_PER_VIEW'].count(),duplicateDF['AD_CLICKS_PER_VIEW'].count(),uniqueDF['AD_CLICKS_PER_VIEW'].count()],[ira_data['AD_CLICKS_PER_VIEW'].min(),duplicateDF['AD_CLICKS_PER_VIEW'].min(),uniqueDF['AD_CLICKS_PER_VIEW'].min()],[ira_data['AD_CLICKS_PER_VIEW'].max(),duplicateDF['AD_CLICKS_PER_VIEW'].max(),uniqueDF['AD_CLICKS_PER_VIEW'].max()],[ira_data['AD_CLICKS_PER_VIEW'].median(),duplicateDF['AD_CLICKS_PER_VIEW'].median(),uniqueDF['AD_CLICKS_PER_VIEW'].median()],[ira_data['AD_CLICKS_PER_VIEW'].mean(),duplicateDF['AD_CLICKS_PER_VIEW'].mean(),uniqueDF['AD_CLICKS_PER_VIEW'].mean()],[ira_data['AD_CLICKS_PER_VIEW'].std(),duplicateDF['AD_CLICKS_PER_VIEW'].std(),uniqueDF['AD_CLICKS_PER_VIEW'].std()],[ira_data['AD_CLICKS_PER_VIEW'].var(),duplicateDF['AD_CLICKS_PER_VIEW'].var(),uniqueDF['AD_CLICKS_PER_VIEW'].var()],[variation(ira_data['AD_CLICKS_PER_VIEW']),variation(duplicateDF['AD_CLICKS_PER_VIEW']),variation(uniqueDF['AD_CLICKS_PER_VIEW'])]]
pd.DataFrame(compair_AD_CLICKS_PER_VIEW,columns=["All", "Duplicates","Unique"], index=["Count","Minimum","Maximum","Median","Mean","Standard deviation","Variance", "Coefficient of variation"]).style.format("{:.10g}")

Unnamed: 0,All,Duplicates,Unique
Count,2533.0,614.0,1919.0
Minimum,0.0,0.0,0.0
Maximum,1.690789474,0.8442622951,1.690789474
Median,0.100990099,0.04553224247,0.1176097973
Mean,0.1012513135,0.05675619844,0.1154878954
Standard deviation,0.07849297322,0.05929284855,0.07857033715
Variance,0.006161146844,0.003515641889,0.00617329788
Coefficient of variation,0.7750761438,1.043842724,0.6801566739


In [23]:
compair_AD_POTENTIAL_TARGET = [[ira_data['AD_POTENTIAL_TARGET'].count(),duplicateDF['AD_POTENTIAL_TARGET'].count(),uniqueDF['AD_POTENTIAL_TARGET'].count()],[ira_data['AD_POTENTIAL_TARGET'].min(),duplicateDF['AD_POTENTIAL_TARGET'].min(),uniqueDF['AD_POTENTIAL_TARGET'].min()],[ira_data['AD_POTENTIAL_TARGET'].max(),duplicateDF['AD_POTENTIAL_TARGET'].max(),uniqueDF['AD_POTENTIAL_TARGET'].max()],[ira_data['AD_POTENTIAL_TARGET'].median(),duplicateDF['AD_POTENTIAL_TARGET'].median(),uniqueDF['AD_POTENTIAL_TARGET'].median()],[ira_data['AD_POTENTIAL_TARGET'].mean(),duplicateDF['AD_POTENTIAL_TARGET'].mean(),uniqueDF['AD_POTENTIAL_TARGET'].mean()],[ira_data['AD_POTENTIAL_TARGET'].std(),duplicateDF['AD_POTENTIAL_TARGET'].std(),uniqueDF['AD_POTENTIAL_TARGET'].std()],[ira_data['AD_POTENTIAL_TARGET'].var(),duplicateDF['AD_POTENTIAL_TARGET'].var(),uniqueDF['AD_POTENTIAL_TARGET'].var()],[variation(ira_data['AD_POTENTIAL_TARGET']),variation(duplicateDF['AD_POTENTIAL_TARGET']),variation(uniqueDF['AD_POTENTIAL_TARGET'])]]
pd.DataFrame(compair_AD_POTENTIAL_TARGET,columns=["All", "Duplicates","Unique"], index=["Count","Minimum","Maximum","Median","Mean","Standard deviation","Variance", "Coefficient of variation"]).style.format("{:.10g}")

Unnamed: 0,All,Duplicates,Unique
Count,1727.0,389.0,1338.0
Minimum,1900.0,29000.0,1900.0
Maximum,253000000.0,252000000.0,253000000.0
Median,24000000.0,17000000.0,24000000.0
Mean,30069233.87,40914964.01,26916028.33
Standard deviation,41249471.44,62942651.26,31665211.16
Variance,1701518894000000.0,3961777348000000.0,1002685598000000.0
Coefficient of variation,,,


In [52]:
uniqueDF.groupby(['AD_CREATION_WEEKDAY']).count()


Unnamed: 0_level_0,AD_CLICKS,AD_CREATION_DATE,AD_END_DATE,AD_IMPRESSIONS,AD_LANDING_PAGE,AD_CLICKS_PER_VIEW,AD_PATH,AD_SPEND,AD_TARGETING_AGE,AD_TARGETING_CUSTOM_AUDIENCE,...,AD_TARGETING_LOCATION_RECENTLY_IN,AD_TARGETING_PEOPLE_WHO_MATCH,AD_TARGETING_PLACEMENTS,AD_TEXT,AD_POTENTIAL_TARGET,AD_CREATION_YEAR,AD_CREATION_MONTH,AD_CREATION_DAY,AD_CREATION_TIME,AD_CREATION_EPOCH_TIME
AD_CREATION_WEEKDAY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,178,178,163,178,177,178,178,178,178,1,...,0,36,178,178,129,178,178,178,178,178
1,403,403,334,403,401,403,403,403,403,3,...,0,98,403,403,256,403,403,403,403,403
2,367,367,321,367,360,367,367,366,367,5,...,0,80,367,367,248,367,367,367,367,367
3,458,458,393,458,458,458,458,457,458,1,...,0,117,458,458,302,458,458,458,458,458
4,490,490,436,490,489,490,490,490,490,4,...,0,81,490,490,384,490,490,490,490,490
5,10,10,10,10,10,10,10,10,10,0,...,0,0,10,10,8,10,10,10,10,10
6,13,13,12,13,13,13,13,13,13,0,...,0,1,13,13,11,13,13,13,13,13


In [45]:
ira_data['AD_CREATION_YEAR'].astype(object).describe()

count     2533
unique       3
top       2016
freq      1300
Name: AD_CREATION_YEAR, dtype: int64

In [None]:
ira_data['AD_CREATION_MONTH'].astype(object).describe()