### 1. Import Pandas

In [192]:
import pandas as pd

### 2. Print Version of Pandas

In [193]:
pd.__version__

'2.2.2'

### 3. Print out all the version information of the libraries that are required by the pandas library.

In [194]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit                : d9cdd2ee5a58015ef6f4d15c7226110c9aab8140
python                : 3.12.3.final.0
python-bits           : 64
OS                    : Windows
OS-release            : 10
Version               : 10.0.21996
machine               : AMD64
processor             : Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder             : little
LC_ALL                : None
LANG                  : None
LOCALE                : English_United States.1252

pandas                : 2.2.2
numpy                 : 1.26.4
pytz                  : 2024.1
dateutil              : 2.8.2
setuptools            : None
pip                   : 24.0
Cython                : None
pytest                : None
hypothesis            : None
sphinx                : None
blosc                 : None
feather               : None
xlsxwriter            : None
lxml.etree            : None
html5lib              : None
pymysql               : None
psycopg2            

## DataFrame basics
### A few of the fundamental routines for selecting, sorting, adding and aggregating data in DataFrames
##### Difficulty: easy

### 4. Create a DataFrame df from this dictionary data which has the index labels.

In [195]:
import numpy as np 

data = {'animal': ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
        'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
        'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
        'priority': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

In [196]:
df = pd.DataFrame(data, index=labels)
df

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


### 5. Display a summary of the basic information about this DataFrame and its data (hint: there is a single method that can be called on the DataFrame).

In [197]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   animal    10 non-null     object 
 1   age       8 non-null      float64
 2   visits    10 non-null     int64  
 3   priority  10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 400.0+ bytes


In [198]:
df.describe()

Unnamed: 0,age,visits
count,8.0,10.0
mean,3.4375,1.9
std,2.007797,0.875595
min,0.5,1.0
25%,2.375,1.0
50%,3.0,2.0
75%,4.625,2.75
max,7.0,3.0


### 6. Return the first 3 rows of the DataFrame df.

In [199]:
df.head(3)

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


In [200]:
df.iloc[:3]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


In [201]:
df.tail(3)

Unnamed: 0,animal,age,visits,priority
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


### 7. Select just the 'animal' and 'age' columns from the DataFrame df.

In [202]:
df[["animal","age"]]

Unnamed: 0,animal,age
a,cat,2.5
b,cat,3.0
c,snake,0.5
d,dog,
e,dog,5.0
f,cat,2.0
g,snake,4.5
h,cat,
i,dog,7.0
j,dog,3.0


### 8. Select the data in rows [3, 4, 8] and in columns ['animal', 'age'].

In [203]:
df[["animal","age"]].iloc[[3,4,8]]

Unnamed: 0,animal,age
d,dog,
e,dog,5.0
i,dog,7.0


### 9. Select only the rows where the number of visits is greater than 3.

In [204]:
df.head()

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no


In [205]:
df[df["visits"] > 2]

Unnamed: 0,animal,age,visits,priority
b,cat,3.0,3,yes
d,dog,,3,yes
f,cat,2.0,3,no


### 10. Select the rows where the age is missing, i.e. it is NaN.

In [206]:
df.isna()

Unnamed: 0,animal,age,visits,priority
a,False,False,False,False
b,False,False,False,False
c,False,False,False,False
d,False,True,False,False
e,False,False,False,False
f,False,False,False,False
g,False,False,False,False
h,False,True,False,False
i,False,False,False,False
j,False,False,False,False


In [207]:
df[df["age"].isna()]

Unnamed: 0,animal,age,visits,priority
d,dog,,3,yes
h,cat,,1,yes


### 11. Select the rows where the animal is a cat and the age is less than 3.

In [208]:
df[(df.animal=="cat")&(df.age < 3)]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
f,cat,2.0,3,no


### 12. Select the rows where age is between 2 and 4 (inclusive).

In [209]:
df

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


In [210]:
df[(df.age>=2)&(df.age <= 4)]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
f,cat,2.0,3,no
j,dog,3.0,1,no


### 13. Change the age in row 'f' to 1.5.

In [211]:
df.loc['f', 'age'] = 1.5
df

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,1.5,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


### 14. Calculate the sum of all visits in df (i.e. find the total number of visits).

In [212]:
df.sum()['visits']

19

### 15. Calculate the mean age for each different animal in df.

In [213]:
df.age.mean()

3.375

In [214]:
g= df.groupby('animal')

In [215]:
g.get_group("cat") 

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
f,cat,1.5,3,no
h,cat,,1,yes


In [216]:
for animal,df in g :
    print(df)
    print() ## new line

  animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
f    cat  1.5       3       no
h    cat  NaN       1      yes

  animal  age  visits priority
d    dog  NaN       3      yes
e    dog  5.0       2       no
i    dog  7.0       2       no
j    dog  3.0       1       no

  animal  age  visits priority
c  snake  0.5       2       no
g  snake  4.5       1       no



In [217]:
df.groupby('animal')['age'].agg('mean')

animal
snake    2.5
Name: age, dtype: float64

In [218]:
df.groupby('animal')['age'].mean()

animal
snake    2.5
Name: age, dtype: float64

### 16. Append a new row 'k' to df with your choice of values for each column. Then delete that row to return the original DataFrame.

In [219]:
df.loc['k'] = ['monkey', 2, 3, 'yes']
df.drop(index=['k'], inplace=True)
df

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,no
g,snake,4.5,1,no


^^ 
Using inplace=True in the drop() method ensures that the operation modifies the original DataFrame df in place, without returning a new DataFrame. If inplace=True is not used, the drop() method will return a new DataFrame with the specified rows removed, but the original DataFrame df will remain unchanged.

### 17. Count the number of each type of animal in df.

In [220]:
animal_counts = df['animal'].value_counts()
print(animal_counts)


animal
snake    2
Name: count, dtype: int64


### 18. Sort df first by the values in the 'age' in decending order, then by the value in the 'visits' column in ascending order (so row i should be first, and row d should be last).

In [221]:
df

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,no
g,snake,4.5,1,no


In [222]:
df.sort_values(by="visits",ascending=False)

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,no
g,snake,4.5,1,no


In [223]:
df.sort_values(by=["visits","age"])

Unnamed: 0,animal,age,visits,priority
g,snake,4.5,1,no
c,snake,0.5,2,no


In [224]:
df.sort_values(by=['age', 'visits'], ascending=[False, True])

Unnamed: 0,animal,age,visits,priority
g,snake,4.5,1,no
c,snake,0.5,2,no


### 19. The 'priority' column contains the values 'yes' and 'no'. Replace this column with a column of boolean values: 'yes' should be True and 'no' should be False.

In [225]:
df.replace({
    "no":"False",
    "yes" : "True"
})

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,False
g,snake,4.5,1,False


In [226]:
df.replace(["yes","no"],["True","False"])

Unnamed: 0,animal,age,visits,priority
c,snake,0.5,2,False
g,snake,4.5,1,False


### 20. In the 'animal' column, change the 'snake' entries to 'python'.

In [227]:
df.replace(["snake"],["Python"])

Unnamed: 0,animal,age,visits,priority
c,Python,0.5,2,no
g,Python,4.5,1,no


In [228]:
df['animal'].replace({'snake': 'python'}, inplace=True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['animal'].replace({'snake': 'python'}, inplace=True)


Unnamed: 0,animal,age,visits,priority
c,python,0.5,2,no
g,python,4.5,1,no


### 21. For each animal type and each number of visits, find the mean age. In other words, each row is an animal, each column is a number of visits and the values are the mean ages (hint: use a pivot table).

In [229]:
for animal,df in g :
    print(df)
    print() ## new line

  animal  age  visits priority
a    cat  2.5       1      yes
b    cat  3.0       3      yes
f    cat  1.5       3       no
h    cat  NaN       1      yes

  animal  age  visits priority
d    dog  NaN       3      yes
e    dog  5.0       2       no
i    dog  7.0       2       no
j    dog  3.0       1       no

  animal  age  visits priority
c  snake  0.5       2       no
g  snake  4.5       1       no



In [230]:
pivot_table = df.pivot_table(index='animal', columns='visits', values='age', aggfunc='mean')
pivot_table

visits,1,2
animal,Unnamed: 1_level_1,Unnamed: 2_level_1
snake,4.5,0.5


## DataFrames: beyond the basics

##### Difficulty: medium


### 22. You have a DataFrame df with a column 'A' of integers. 

#### For example:
df = pd.DataFrame({'A': [1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7]})

How do you filter out rows which contain the same integer as the row immediately above?

You should be left with a column containing the following values:

1, 2, 3, 4, 5, 6, 7

In [231]:
df = pd.DataFrame({'A': [1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7]})
df

Unnamed: 0,A
0,1
1,2
2,2
3,3
4,4
5,5
6,5
7,5
8,6
9,7


In [232]:
df_new = df["A"].unique()
pd.DataFrame(df_new)

##WORNG APPROACH!!


Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5
5,6
6,7


In [233]:
df

Unnamed: 0,A
0,1
1,2
2,2
3,3
4,4
5,5
6,5
7,5
8,6
9,7


In [234]:
df['A'].shift()

0     NaN
1     1.0
2     2.0
3     2.0
4     3.0
5     4.0
6     5.0
7     5.0
8     5.0
9     6.0
10    7.0
Name: A, dtype: float64

In [235]:
df['A'] != df['A'].shift()

0      True
1      True
2     False
3      True
4      True
5      True
6     False
7     False
8      True
9      True
10    False
Name: A, dtype: bool

In [236]:
filtered_df = df[df['A'] != df['A'].shift()] ## ONLY TRUE VALUES WILL BE PRINTED 

filtered_df

Unnamed: 0,A
0,1
1,2
3,3
4,4
5,5
8,6
9,7


### 23. Given a DataFrame of numeric values, say

df = pd.DataFrame(np.random.random(size=(5, 3))) # a 5x3 frame of float values

how do you subtract the row mean from each element in the row?

In [237]:
df = pd.DataFrame(np.ones((5, 3))) # a 5x3 frame of float values
df

Unnamed: 0,0,1,2
0,1.0,1.0,1.0
1,1.0,1.0,1.0
2,1.0,1.0,1.0
3,1.0,1.0,1.0
4,1.0,1.0,1.0


In [238]:
df.mean(axis=1)

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
dtype: float64

In [239]:
df.sub(df.mean(axis=1), axis=0)

Unnamed: 0,0,1,2
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0


In [240]:
df = df - df.mean()
df

Unnamed: 0,0,1,2
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0


### 24. Suppose you have DataFrame with 10 columns of real numbers, for example:

df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))

Which column of numbers has the smallest sum? Return that column's label.

In [241]:
df = pd.DataFrame(np.random.random(size=(5, 10)), columns=list('abcdefghij'))
df

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.219389,0.507293,0.704069,0.939277,0.336941,0.670582,0.715004,0.422001,0.761266,0.358284
1,0.387159,0.475561,0.943587,0.490287,0.174849,0.032382,0.000232,0.877953,0.76789,0.803871
2,0.59829,0.88689,0.800119,0.043483,0.780307,0.889189,0.315639,0.400822,0.939286,0.14362
3,0.42042,0.776845,0.336779,0.409763,0.066029,0.27018,0.388379,0.484329,0.118368,0.354196
4,0.181661,0.086992,0.601609,0.209433,0.829251,0.804659,0.197103,0.947957,0.790346,0.508418


In [242]:
df.sum()

a    1.806918
b    2.733581
c    3.386163
d    2.092244
e    2.187377
f    2.666992
g    1.616358
h    3.133061
i    3.377156
j    2.168388
dtype: float64

In [243]:
df.sum().min()


1.6163579114058155

In [244]:
df.sum().idxmin()


'g'

In [245]:
df.sum().idxmax()

'c'

### 25. How do you count how many unique rows a DataFrame has (i.e. ignore all rows that are duplicates)? 
As input, use a DataFrame of zeros and ones with 10 rows and 3 columns.

In [246]:
np.random.randint (0,1)

0

In [247]:
np.random.randint(0, 2, size=(10, 3))

array([[0, 0, 1],
       [1, 1, 0],
       [1, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [1, 1, 1],
       [1, 0, 1],
       [0, 0, 0],
       [0, 0, 1],
       [1, 0, 0]])

In [248]:
df = pd.DataFrame(np.random.randint(0, 2, size=(5, 3)))
df

Unnamed: 0,0,1,2
0,1,0,1
1,0,1,1
2,1,0,1
3,1,1,0
4,1,0,1


In [249]:
df = df.drop_duplicates() ## drops duplicate rows
df

Unnamed: 0,0,1,2
0,1,0,1
1,0,1,1
3,1,1,0


### 26. In the cell below, you have a DataFrame df that consists of 10 columns of floating-point numbers. Exactly 5 entries in each row are NaN values.

For each row of the DataFrame, find the column which contains the third NaN value.

You should return a Series of column labels: e, c, d, h, d

In [250]:
nan=np.nan
data = [[0.04,  nan,  nan, 0.25,  nan, 0.43, 0.71, 0.51,  nan,  nan],
        [ nan,  nan,  nan, 0.04, 0.76,  nan,  nan, 0.67, 0.76, 0.16],
        [ nan,  nan, 0.5 ,  nan, 0.31, 0.4 ,  nan,  nan, 0.24, 0.01],
        [0.49,  nan,  nan, 0.62, 0.73, 0.26, 0.85,  nan,  nan,  nan],
        [ nan,  nan, 0.41,  nan, 0.05,  nan, 0.61,  nan, 0.48, 0.68]]

col= ["a","b","c","d","e","f","g","h","i","j"]

p=pd.DataFrame(data,columns=col)
p

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.04,,,0.25,,0.43,0.71,0.51,,
1,,,,0.04,0.76,,,0.67,0.76,0.16
2,,,0.5,,0.31,0.4,,,0.24,0.01
3,0.49,,,0.62,0.73,0.26,0.85,,,
4,,,0.41,,0.05,,0.61,,0.48,0.68


In [251]:
p.isna()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,False,True,True,False,True,False,False,False,True,True
1,True,True,True,False,False,True,True,False,False,False
2,True,True,False,True,False,False,True,True,False,False
3,False,True,True,False,False,False,False,True,True,True
4,True,True,False,True,False,True,False,True,False,False


In [252]:
p[p.isna()]


Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [253]:
def third_nan_col(row):
    nan_cols = row.index[row.isna()]
    return nan_cols[2] if len(nan_cols) >= 3 else None

result_series = p.apply(third_nan_col, axis=1)

print(result_series)

0    e
1    c
2    d
3    h
4    d
dtype: object


# PROBLEM ^^

### 27. A DataFrame has a column of groups 'grps' and and column of integer values 'vals':

df = pd.DataFrame({'grps': list('aaabbcaabcccbbc'), 

                   'vals': [12,345,3,1,45,14,4,52,54,23,235,21,57,3,87]})

For each group, find the sum of the three greatest values. You should end up with the answer as follows:


grps

a               409

b                156

c               345

In [254]:
df = pd.DataFrame({'grps': list('aaabbcaabcccbbc'), 
                   'vals': [12,345,3,1,45,14,4,52,54,23,235,21,57,3,87]})

df

Unnamed: 0,grps,vals
0,a,12
1,a,345
2,a,3
3,b,1
4,b,45
5,c,14
6,a,4
7,a,52
8,b,54
9,c,23


In [255]:
g= df.groupby("grps")

In [256]:
for grps,df in g:
    print(df)
    print()

  grps  vals
0    a    12
1    a   345
2    a     3
6    a     4
7    a    52

   grps  vals
3     b     1
4     b    45
8     b    54
12    b    57
13    b     3

   grps  vals
5     c    14
9     c    23
10    c   235
11    c    21
14    c    87



In [257]:
group_sums = df.groupby('grps')['vals'].sum() ### TOTAL SUM
print(group_sums)

grps
c    380
Name: vals, dtype: int64


In [258]:
def sum_of_three_greatest(group):
    sorted_vals = group.sort_values(ascending=False) ###
    return sorted_vals.iloc[:3].sum()


In [259]:
df.groupby('grps')['vals'].apply(sum_of_three_greatest).reset_index()

Unnamed: 0,grps,vals
0,c,345


### 28. The DataFrame df constructed below has two integer columns 'A' and 'B'. The values in 'A' are between 1 and 100 (inclusive).


For each group of 10 consecutive integers in 'A' (i.e. (0, 10], (10, 20], ...), calculate the sum of the corresponding values in column 'B'.

#### || SKIPPED 28 ###

## DataFrames: harder problems 

### 29. Consider a DataFrame df where there is an integer column 'X':


df = pd.DataFrame({'X': [7, 2, 0, 3, 4, 2, 5, 0, 3, 4]})
For each value, count the difference back to the previous zero (or the start of the Series, whichever is closer). These values should therefore be


[1, 2, 0, 1, 2, 3, 4, 0, 1, 2]
Make this a new column 'Y'.

### 30. Consider the DataFrame constructed below which contains rows and columns of numerical data.


Create a list of the column-row index locations of the 3 largest values in this DataFrame. In this case, the answer should be:


[(5, 7), (6, 4), (2, 5)]

### 31. You are given the DataFrame below with a column of group IDs, 'grps', and a column of corresponding integer values, 'vals'.


df = pd.DataFrame({"vals": np.random.RandomState(31).randint(-30, 30, size=15), 
                   "grps": np.random.RandomState(31).choice(["A", "B"], 15)})
                   
Create a new column 'patched_values' which contains the same values as the 'vals' any negative values in 'vals' with the group mean:

### 32. Implement a rolling mean over groups with window size 3, which ignores NaN value. For example consider the following DataFrame:


>>> df = pd.DataFrame({'group': list('aabbabbbabab'),
                       'value': [1, 2, 3, np.nan, 2, 3, np.nan, 1, 7, 3, np.nan, 8]})

|| SKIPPED (29-32) The Harder Problems 

# Series and DatetimeIndex

### Exercises for creating and manipulating Series with datetime data
##### Difficulty: easy/medium

### 33. Create a DatetimeIndex that contains each business day of 2015 and use it to index a Series of random numbers. Let's call this Series s.

In [260]:
business_days = pd.bdate_range("2015-01-01", "2015-12-31") ##Prints business days

In [261]:
values = np.random.randint(100,size=len(business_days))
values

array([90, 92, 47,  0, 82, 85, 89, 73, 12, 14, 28,  4, 44, 70, 40, 85, 28,
       59, 35, 91, 55, 84, 22, 69,  2, 64, 44, 17, 76, 88, 64, 99, 13, 52,
       19, 27, 28,  6, 36, 35, 93, 88, 67, 66, 40, 96, 37,  5, 96, 38, 13,
       72, 31, 76, 46, 17, 77, 55, 97, 46, 12, 75, 48, 90, 91, 79, 80, 58,
       31, 88, 58, 48, 82, 83, 96, 14, 23, 63,  4, 22, 76, 44, 15, 84, 32,
       93, 62,  0, 82,  9, 80, 36, 61, 73, 95,  6, 39, 36, 37, 83,  7,  4,
       58,  3, 90, 84, 27, 82, 21, 94, 19, 10, 93, 70, 81, 13, 97,  6, 39,
       75,  3, 98, 83, 62, 49, 89, 36, 48, 12,  6, 30, 38, 41, 80,  4, 49,
       31, 29, 52, 90, 15, 31, 48, 14, 31, 33, 34, 79, 89, 59,  0, 60, 81,
       92, 28,  9, 30, 33, 30, 15, 27,  5, 92, 60, 47, 21,  1, 97, 91, 95,
       59, 51, 55, 56, 23, 81, 94,  6, 55, 82, 17, 62, 84, 59, 41, 97, 33,
       19, 43, 71, 76, 16, 55, 75, 15, 34, 54, 79, 83, 29, 76, 77, 30, 22,
       33, 35, 71, 29, 24, 59, 45, 62, 33, 73, 29, 22, 63, 38, 99,  4, 49,
       15, 19, 74, 54, 84

In [262]:
s= pd.Series(values ,index = business_days )
s

2015-01-01    90
2015-01-02    92
2015-01-05    47
2015-01-06     0
2015-01-07    82
              ..
2015-12-25     7
2015-12-28    70
2015-12-29    49
2015-12-30    46
2015-12-31    23
Freq: B, Length: 261, dtype: int32

### 34. Find the sum of the values in s for every Wednesday.

In [263]:
s.index

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06',
               '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12',
               '2015-01-13', '2015-01-14',
               ...
               '2015-12-18', '2015-12-21', '2015-12-22', '2015-12-23',
               '2015-12-24', '2015-12-25', '2015-12-28', '2015-12-29',
               '2015-12-30', '2015-12-31'],
              dtype='datetime64[ns]', length=261, freq='B')

In [264]:
s.index.day_name()

Index(['Thursday', 'Friday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
       'Friday', 'Monday', 'Tuesday', 'Wednesday',
       ...
       'Friday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       'Monday', 'Tuesday', 'Wednesday', 'Thursday'],
      dtype='object', length=261)

In [265]:
count = 0
for index_date in s.index:
    if index_date.day_name() == "Wednesday":
        count = count+1

print("Count of Wednesdays:", count)  ##counting total wednesday 


Count of Wednesdays: 52


In [266]:
s[s.index.day_name() == "Wednesday"]

2015-01-07    82
2015-01-14    14
2015-01-21    40
2015-01-28    91
2015-02-04     2
2015-02-11    88
2015-02-18    19
2015-02-25    35
2015-03-04    40
2015-03-11    38
2015-03-18    46
2015-03-25    46
2015-04-01    91
2015-04-08    88
2015-04-15    96
2015-04-22    22
2015-04-29    32
2015-05-06     9
2015-05-13    95
2015-05-20    83
2015-05-27    90
2015-06-03    94
2015-06-10    81
2015-06-17    75
2015-06-24    49
2015-07-01     6
2015-07-08     4
2015-07-15    90
2015-07-22    31
2015-07-29    59
2015-08-05    28
2015-08-12    15
2015-08-19    47
2015-08-26    95
2015-09-02    23
2015-09-09    82
2015-09-16    41
2015-09-23    71
2015-09-30    15
2015-10-07    29
2015-10-14    33
2015-10-21    59
2015-10-28    29
2015-11-04     4
2015-11-11    54
2015-11-18    90
2015-11-25    11
2015-12-02    48
2015-12-09    68
2015-12-16    45
2015-12-23    66
2015-12-30    46
dtype: int32

In [267]:
wednesday_sum = s[s.index.day_name() == "Wednesday"].sum()
wednesday_sum 

2635

### 35. For each calendar month in s, find the mean of values.

In [268]:
months = s.index.month_name()
months

Index(['January', 'January', 'January', 'January', 'January', 'January',
       'January', 'January', 'January', 'January',
       ...
       'December', 'December', 'December', 'December', 'December', 'December',
       'December', 'December', 'December', 'December'],
      dtype='object', length=261)

In [269]:
a = s.groupby(months).mean()
a

April        57.454545
August       48.523810
December     41.956522
February     47.100000
January      54.863636
July         41.000000
June         53.636364
March        54.545455
May          46.285714
November     47.571429
October      48.272727
September    52.727273
dtype: float64

In [270]:
a.sort_values()

July         41.000000
December     41.956522
May          46.285714
February     47.100000
November     47.571429
October      48.272727
August       48.523810
September    52.727273
June         53.636364
March        54.545455
January      54.863636
April        57.454545
dtype: float64

In [271]:
a.sort_index() 

April        57.454545
August       48.523810
December     41.956522
February     47.100000
January      54.863636
July         41.000000
June         53.636364
March        54.545455
May          46.285714
November     47.571429
October      48.272727
September    52.727273
dtype: float64

In [272]:
a = a.reindex(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'])  # Sort the mean values by month names
a

January      54.863636
February     47.100000
March        54.545455
April        57.454545
May          46.285714
June         53.636364
July         41.000000
August       48.523810
September    52.727273
October      48.272727
November     47.571429
December     41.956522
dtype: float64

In [273]:
s.resample("M").mean()

  s.resample("M").mean()


2015-01-31    54.863636
2015-02-28    47.100000
2015-03-31    54.545455
2015-04-30    57.454545
2015-05-31    46.285714
2015-06-30    53.636364
2015-07-31    41.000000
2015-08-31    48.523810
2015-09-30    52.727273
2015-10-31    48.272727
2015-11-30    47.571429
2015-12-31    41.956522
Freq: ME, dtype: float64

### 36. For each group of four consecutive calendar months in s, find the date on which the highest value occurred.