# Welcome to this Kernel For Assignement



<a id='table_of_contents'></a>
# Table of contents

[1. How to create a series from a list, numpy array and dict?](#q1)

[2. How to combine many series to form a dataframe?](#q2)

[3. How to get the items of series A not present in series B?](#q3)

[4. How to get the items not common to both series A and series B?](#q4)

[5. How to get useful infos](#q5)

[6. How to get frequency counts of unique items of a series?](#q6)

[7. How to convert a numpy array to a dataframe of given shape? (L1)](#q7)

[8. How to find the positions of numbers that are multiples of 3 from a series?](#q8)

[9. How to extract items at given positions from a series?](#q9)

[10. How to stack two series vertically and horizontally ?](#q10)

[11. How to get the positions of items of series A in another series B?](#q11)

[12. How to compute difference of differences between consequtive numbers of a series?](#q12)

[13. How to convert a series of date-strings to a timeseries?](#q13)

[14. How to filter words that contain atleast 2 vowels from a series?](#q14)

[15. How to replace missing spaces in a string with the least frequent character?](#q15)

[16. How to change column values when importing csv to a dataframe?](#q16)

[17. How to import only specified columns from a csv file?](#q17)

[18. How to check if a dataframe has any missing values?](#q18)

[19. How to replace missing values of multiple numeric columns with the mean?](#q19)

[20. How to change the order of columns of a dataframe?](#q20)

[21. How to filter every nth row in a dataframe?](#q21)

[22. How to get the last n rows of a dataframe with row sum > 100?](#q22)

[23. How to find and cap outliers from a series or dataframe column?](#q23)

[24. How to reverse the rows of a dataframe?](#q24)


In [2]:


# Allow several prints in one cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np

# Pandas exercise

# 1. How to create a series from a list, numpy array and dict?
a_list = list("abcdefg")
numpy_array = np.arange(1, 10)
dictionary = {"A":  0, "B":1, "C":2, "D":3, "E":5}



In [3]:
# 2. How to combine many series to form a dataframe?
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
df = pd.DataFrame({'ser1': ser1, 'ser2': ser2})



In [4]:
# 3. How to get the items of series A not present in series B?
ser1 = pd.Series([1, 2, 3,23, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
not_in_ser2 = ser1[~ser1.isin(ser2)]



In [5]:
# 4. How to get the items not common to both series A and series B?
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
not_common = pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]])



In [6]:
# 5. How to get useful infos
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))
useful_infos = ser.describe()



In [7]:
# 6. How to get frequency counts of unique items of a series?
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(5, size=30)))
freq_counts = ser.value_counts()




In [8]:
# 7. How to convert a numpy array to a dataframe of given shape? (L1)
ser = pd.Series(np.random.randint(1, 10, 35))
df_shape = pd.DataFrame(ser.values.reshape(7, 5))



In [9]:
# 8. How to find the positions of numbers that are multiples of 3 from a series?
ser = pd.Series(np.random.randint(1, 5, 10))
pos_multiple_3 = ser[ser % 3 == 0].index.tolist()



In [13]:
# 9. How to extract items at given positions from a series
pos = [0, 4, 8, 9]  # Adjusted positions to match the size of the series
items_at_pos = ser.iloc[pos]


In [15]:
# 10. How to stack two series vertically and horizontally ?
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
stacked_vertically = pd.concat([ser1, ser2], axis=0)
stacked_horizontally = pd.concat([ser1, ser2], axis=1)



In [16]:
# 11. How to get the positions of items of series A in another series B?
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])
positions_in_ser1 = [np.where(ser1 == i)[0].tolist()[0] for i in ser2]



In [17]:
# 12. How to compute difference of differences between consequtive numbers of a series?
diff_of_diff = ser.diff().tolist()
diff_of_diff_of_diff = ser.diff().diff().tolist()


In [18]:

# 13. How to convert a series of date-strings to a timeseries?
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
date_series = pd.to_datetime(ser)


In [19]:

# 14. How to filter words that contain atleast 2 vowels from a series?
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])
atleast_2_vowels = ser[ser.apply(lambda x: sum(1 for char in x if char.lower() in 'aeiou') >= 2)]


In [20]:

# 15. How to replace missing spaces in a string with the least frequent character?
my_str = 'dbc deb abed ggade'
least_frequent_char = pd.Series(list(my_str)).value_counts().index[-1]
replaced_str = ''.join([char if char != ' ' else least_frequent_char for char in my_str])


In [21]:

# Print results
print("1. Series from list, numpy array, and dictionary:\n", a_list, numpy_array, dictionary)
print("\n2. Combined dataframe:\n", df)
print("\n3. Items of series A not present in series B:\n", not_in_ser2)
print("\n4. Items not common to both series A and series B:\n", not_common)
print("\n5. Useful infos:\n", useful_infos)
print("\n6. Frequency counts of unique items of a series:\n", freq_counts)
print("\n7. DataFrame of given shape:\n", df_shape)
print("\n8. Positions of numbers that are multiples of 3:\n", pos_multiple_3)
print("\n9. Items at given positions from a series:\n", items_at_pos)
print("\n10. Stacked series vertically:\n", stacked_vertically)
print("\n   Stacked series horizontally:\n", stacked_horizontally)
print("\n11. Positions of items of series A in another series B:\n", positions_in_ser1)
print("\n12. Difference of differences between consequtive numbers of a series:\n", diff_of_diff)
print("\n    Difference of differences of differences:\n", diff_of_diff_of_diff)
print("\n13. Series of date-strings converted to timeseries:\n", date_series)
print("\n14. Filtered words containing atleast 2 vowels:\n", atleast_2_vowels)
print("\n15. Replaced string:\n", replaced_str)

1. Series from list, numpy array, and dictionary:
 ['a', 'b', 'c', 'd', 'e', 'f', 'g'] [1 2 3 4 5 6 7 8 9] {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 5}

2. Combined dataframe:
    ser1  ser2
0     a     0
1     b     1
2     c     2
3     e     3
4     d     4
5     f     5
6     g     6
7     h     7
8     i     8
9     j     9
10    k    10
11    l    11
12    m    12
13    n    13
14    o    14
15    p    15
16    q    16
17    r    17
18    s    18
19    t    19
20    u    20
21    v    21
22    w    22
23    x    23
24    y    24
25    z    25

3. Items of series A not present in series B:
 0     1
1     2
2     3
3    23
dtype: int64

4. Items not common to both series A and series B:
 0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64

5. Useful infos:
 count    25.000000
mean     10.435437
std       4.253118
min       1.251173
25%       7.709865
50%      10.922593
75%      13.363604
max      18.094908
dtype: float64

6. Frequency counts of unique items of a series:
 e    10
a  

In [24]:
# 16. How to change column values when importing csv to a dataframe?
df = pd.read_csv('/content/winequality.csv', converters={'type': lambda x: x.upper()})
df

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,WHITE,7.0,0.270,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,WHITE,6.3,0.300,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,WHITE,8.1,0.280,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,WHITE,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,WHITE,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6492,RED,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
6493,RED,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,,11.2,6
6494,RED,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
6495,RED,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [25]:

# 17. How to import only specified columns from a csv file?
df = pd.read_csv('/content/winequality.csv', usecols=['type', 'chlorides'])
df

Unnamed: 0,type,chlorides
0,white,0.045
1,white,0.049
2,white,0.050
3,white,0.058
4,white,0.058
...,...,...
6492,red,0.090
6493,red,0.062
6494,red,0.076
6495,red,0.075


In [29]:

# 18. How to check if a dataframe has any missing values?
has_missing_values = df.isnull().values.any()
has_missing_values_count = df.isnull().sum()
has_missing_values
has_missing_values_count

True

type         0
chlorides    2
dtype: int64

In [31]:

# 19. How to replace missing values of multiple numeric columns with the mean?
numeric_columns = df.select_dtypes(include='number').columns
df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())
print(df[numeric_columns])


      chlorides
0         0.045
1         0.049
2         0.050
3         0.058
4         0.058
...         ...
6492      0.090
6493      0.062
6494      0.076
6495      0.075
6496      0.067

[6497 rows x 1 columns]


In [34]:
# 20. How to change the order of columns of a dataframe?
new_column_order = ['chlorides', 'type']  # Specify your desired column order
df = df[new_column_order]

df

Unnamed: 0,chlorides,type
0,0.045,white
1,0.049,white
2,0.050,white
3,0.058,white
4,0.058,white
...,...,...
6492,0.090,red
6493,0.062,red
6494,0.076,red
6495,0.075,red


In [36]:
# 21. How to filter every nth row in a dataframe?
n = 2  # Specify the value of n
filtered_df = df.iloc[::n]
filtered_df


Unnamed: 0,chlorides,type
0,0.045,white
2,0.050,white
4,0.058,white
6,0.045,white
8,0.049,white
...,...,...
6488,0.077,red
6490,0.076,red
6492,0.090,red
6494,0.076,red


In [37]:
# 22. How to get the last n rows of a dataframe with row sum > 100?
n = 5  # Specify the value of n
last_n_rows_sum_gt_100 = df[df.sum(axis=1) > 100].tail(n)
last_n_rows_sum_gt_100


  last_n_rows_sum_gt_100 = df[df.sum(axis=1) > 100].tail(n)


Unnamed: 0,chlorides,type


In [39]:
# 23. How to find and cap outliers from a series or dataframe column?
from scipy import stats
z_scores = stats.zscore(df['chlorides'])
capped_df = df[(z_scores < 3) & (z_scores > -3)]
capped_df


Unnamed: 0,chlorides,type
0,0.045,white
1,0.049,white
2,0.050,white
3,0.058,white
4,0.058,white
...,...,...
6492,0.090,red
6493,0.062,red
6494,0.076,red
6495,0.075,red


In [40]:
# 24. How to reverse the rows of a dataframe?
reversed_df = df[::-1]
