# Series

In [2]:
import pandas as pd;

### Create a Series Object from a list 

* A pandas Series is a One-dimensional array.
* A series combines the best features of a list and a dictionary.
* A Series maintains a single collection of ordered values


In [3]:
ice_cream=['Chacolate', 'Vennela', "Strawberry"]
flavour=pd.Series(ice_cream, name="Flavour")

In [4]:
flavour

0     Chacolate
1       Vennela
2    Strawberry
Name: Flavour, dtype: object

### Create a Series Object from a dictionary

In [5]:
sushi={
    'Salmon':'Orange',
    'Tuna':'red',
    'Eel':'Brown'
}
pd.Series(sushi)
    

Salmon    Orange
Tuna         red
Eel        Brown
dtype: object

### Intro to Series Mathods

In [6]:
prices=pd.Series([2.99, 4.55,1.36])

In [7]:
prices

0    2.99
1    4.55
2    1.36
dtype: float64

In [8]:
prices.sum()

8.9

In [9]:
prices.product()

18.50212

In [10]:
prices.mean()

2.966666666666667

### Intro to Attributes
* An Attribute is a piece that lives on an object.
* An attribute is a fact, a detail, a characteristic of the object.
* Access an attribute with object.attribute syntax.

In [11]:
adj=pd.Series(['Smart', 'Hamdsome','charming'])

In [12]:
adj.size

3

In [13]:
adj.is_unique

True

In [14]:
adj.index

RangeIndex(start=0, stop=3, step=1)

In [15]:
adj.values

array(['Smart', 'Hamdsome', 'charming'], dtype=object)

In [16]:
type(adj.values)

numpy.ndarray

### Parameters and Arguments

In [17]:
fruits=['apple','mango', 'berry','orange']
weekdays=['monday','tuesday', 'wednesday', 'thursday']

In [18]:
pd.Series(fruits)

0     apple
1     mango
2     berry
3    orange
dtype: object

In [19]:
pd.Series(data=fruits, index=weekdays, name="daily")

monday        apple
tuesday       mango
wednesday     berry
thursday     orange
Name: daily, dtype: object

# Import Series with the pd.read_csv Function
* A CSV is a plaint text file that uses the breaks to separate rows and columns to separate row values.
* Pandas ships with many different read_ functions for different types of files.
* The read_csv function accepts many different parameters, The first one specifies the file name/path.
* The usecols parameter accepts a list of the columns to import
* The squeeze method converst a DatFrame to a series

In [20]:
pokemon=pd.read_csv('pokemon.csv', usecols=['Name']).squeeze('columns')

In [21]:
pokemon

0          Bulbasaur
1            Ivysaur
2           Venusaur
3         Charmander
4         Charmeleon
            ...     
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, Length: 1010, dtype: object

In [22]:
pokemon.loc[3]['Type']

TypeError: string indices must be integers, not 'str'

# The head and tail methods
* The head method returns a number of rows from the beginning of the series.
* The tail method returns a number of rows from the bottom of the series.

In [None]:
pokemon.head()

In [None]:
pokemon.head(10)

In [None]:
pokemon.tail()

In [None]:
pokemon.tail(10)

# Passing Series to Python's Built-in Functions
* The len function returns the length of th Series.
* The type function returns the type of an object.
* The list function converts the Series to a list.
* The dict function converts the series to a dictionary.
* The sorted function converts the Series to a sorted list.
* The max and min functions returns the largest and smallest value in the Series.

In [None]:
len(pokemon)

In [None]:
type(pokemon)

In [None]:
list(pokemon)

In [None]:
dict(pokemon)

In [None]:
sorted(pokemon)

In [None]:
max(pokemon)

In [None]:
min(pokemon)

# Check for inclusion with Python's in keyword
* The in keyword checks if a value exists within an object.
* The in keyword will look for a values in the Series's index.
* Use the index and values attributes to access 'nested' objects within the Series.
* Combine the in keyword with values to search within the Series's values.


In [None]:
2 in [1,2]

In [None]:
"car" in "racecar"

In [None]:
pokemon

In [None]:
'Bulbasaur' in pokemon.values

In [None]:
pokemon.values

### The Sort_values method
* Sorts a Series by its values.
* By default it is set to ascending.

In [None]:
pokemon.sort_values()

In [None]:
pokemon.sort_values(ascending=False)

In [None]:
pokemon.sort_values()

In [None]:
pokemon.sort_index().head()

# Extract Series Value b Index Position
* Use the iloc accessor to extract a Series value by its index position.
* iloc is short for "index location".
  

In [None]:
pokemon.iloc[0:5]

In [None]:
pokemon.iloc[9]

In [None]:
pokemon.iloc[[100, 200, 300]]

In [23]:
pokemon.iloc[-1]

'Iron Leaves'

In [25]:
pokemon.iloc[-8:]

1002         Ting-Lu
1003          Chi-Yu
1004    Roaring Moon
1005    Iron Valiant
1006        Koraidon
1007        Miraidon
1008    Walking Wake
1009     Iron Leaves
Name: Name, dtype: object

In [26]:
pokemon[[12, 133, 19]]

12       Weedle
133    Vaporeon
19     Raticate
Name: Name, dtype: object

In [31]:
pokemon.iloc[9:13]

9       Caterpie
10       Metapod
11    Butterfree
12        Weedle
Name: Name, dtype: object

# Extract Series value by Index Label

In [32]:
pokemon=pd.read_csv('pokemon.csv', index_col="Name").squeeze('columns')

In [33]:
pokemon.head()

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

In [34]:
pokemon.loc['Ivysaur']

'Grass, Poison'

In [35]:
pokemon.iloc[2]

'Grass, Poison'

In [36]:
pokemon[0]

'Grass, Poison'

In [41]:
pokemon.loc['Bulbasaur':'Charmeleon']

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

In [43]:
pokemon[0:5]

Name
Bulbasaur     Grass, Poison
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

# The get method on a Series
* The get method extracts a Series value by index label. It is an alternative option to square brackets.
* The get method's second argument sets the fallback value to return if the label/position does not exist.

In [45]:
pokemon.get('Bulbasaur')

'Grass, Poison'

In [48]:
pokemon.get('Maki')

In [49]:
pokemon['Maki']

KeyError: 'Maki'

In [50]:
pokemon.get("Maki","Nonexistent")

'Nonexistent'

In [51]:
pokemon.get("Bulbasaur","Nonexistent")

'Grass, Poison'

In [52]:
pokemon.get(0)

'Grass, Poison'

# Overwrite a Series value
* Use the loc/iloc accessor to largets an index label/position , then use an equal sign to provide a new value.

In [54]:
pokemon.loc['Bulbasaur']="Maki"

In [55]:
pokemon.head()

Name
Bulbasaur              Maki
Ivysaur       Grass, Poison
Venusaur      Grass, Poison
Charmander             Fire
Charmeleon             Fire
Name: Type, dtype: object

In [56]:
pokemon['Bulbasaur']

'Maki'

In [59]:
pokemon.iloc[[1,2 ,4]]=["maki","qwer","wqew"]

In [60]:
pokemon.head()

Name
Bulbasaur     Maki
Ivysaur       maki
Venusaur      qwer
Charmander    Fire
Charmeleon    wqew
Name: Type, dtype: object

In [61]:
pokemon.loc["Ivysaur"]="i"

In [62]:
pokemon[["Bulbasaur", "Ivysaur"]]

Name
Bulbasaur    Maki
Ivysaur         i
Name: Type, dtype: object

In [63]:
pokemon[0]

'Maki'

# The Copy method
* A *copy* is a duplicate/replica of an object.
* Changes to a copy do not modify the original object.
* A *view* is a different way of looking at the same data.
* Changes to a view do modify the original object.

In [76]:
pokemon_df=pd.read_csv('pokemon.csv', usecols=["Name"])

In [77]:
pokemon_series=pokemon_df.squeeze("columns").copy()

In [78]:
type(pokemon_series)

pandas.core.series.Series

In [79]:
pokemon_series[0]="whatever"

In [80]:
pokemon_series.head()

0      whatever
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Name, dtype: object

In [81]:
pokemon_df.head()

Unnamed: 0,Name
0,Bulbasaur
1,Ivysaur
2,Venusaur
3,Charmander
4,Charmeleon


# Math Methods on Series Objects
* count- returns the number of values in the Series. It excludes missing values,  the *size* includes missing values.
* sum, product, mean, std, max, min, median, mode.
* describe - returns  a summary with various mathematical calculations.

# Broadcasting
* **Broadcasting** describes the process of applying an arithemetic operation to an array.(i.e., a **Series**)
* We can combine mathematical opearations with a **Series** to apply the mathematical opearation to every value.
* There are also methods to accomplish the same results (*add*, *sub*, *mul*, *div*).

In [82]:
google=pd.read_csv('google_stock_price.csv', usecols=["Price"]).squeeze("columns")

In [84]:
google.head()

0    2.490664
1    2.515820
2    2.758411
3    2.770615
4    2.614201
Name: Price, dtype: float64

In [85]:
google+ 10

0        12.490664
1        12.515820
2        12.758411
3        12.770615
4        12.614201
           ...    
4788    142.080002
4789    142.998001
4790    145.570007
4791    147.050003
4792    148.429993
Name: Price, Length: 4793, dtype: float64

In [86]:
google.add(10)

0        12.490664
1        12.515820
2        12.758411
3        12.770615
4        12.614201
           ...    
4788    142.080002
4789    142.998001
4790    145.570007
4791    147.050003
4792    148.429993
Name: Price, Length: 4793, dtype: float64

In [89]:
google.sub(10)

0        -7.509336
1        -7.484180
2        -7.241589
3        -7.229385
4        -7.385799
           ...    
4788    122.080002
4789    122.998001
4790    125.570007
4791    127.050003
4792    128.429993
Name: Price, Length: 4793, dtype: float64

In [90]:
google

0         2.490664
1         2.515820
2         2.758411
3         2.770615
4         2.614201
           ...    
4788    132.080002
4789    132.998001
4790    135.570007
4791    137.050003
4792    138.429993
Name: Price, Length: 4793, dtype: float64

In [91]:
google-10

0        -7.509336
1        -7.484180
2        -7.241589
3        -7.229385
4        -7.385799
           ...    
4788    122.080002
4789    122.998001
4790    125.570007
4791    127.050003
4792    128.429993
Name: Price, Length: 4793, dtype: float64

In [93]:
google*10
google.mul(10)

0         24.90664
1         25.15820
2         27.58411
3         27.70615
4         26.14201
           ...    
4788    1320.80002
4789    1329.98001
4790    1355.70007
4791    1370.50003
4792    1384.29993
Name: Price, Length: 4793, dtype: float64

In [96]:
google/2
google.div(2)


0        1.245332
1        1.257910
2        1.379206
3        1.385307
4        1.307100
          ...    
4788    66.040001
4789    66.499000
4790    67.785004
4791    68.525002
4792    69.214996
Name: Price, Length: 4793, dtype: float64

# The value_counts Method
* The **value_counts** method returns the number of times each unique value occurs in the **Series**.
* The **normalize** parameter returns the relative frequencies/percentages of the values instaed of the counts.

In [100]:
google.count()

4793

In [108]:
google.value_counts(ascending=True, normalize=True)

Price
2.490664     0.000209
41.775501    0.000209
41.619999    0.000209
41.397999    0.000209
41.269501    0.000209
               ...   
11.457056    0.000626
39.000000    0.000626
14.022440    0.000626
14.719826    0.000835
49.000000    0.000835
Name: proportion, Length: 4652, dtype: float64

# The apply method
* The **apply** method accepts a function. It invokes that function on every **Series** value.

In [109]:
pokemon.apply(len)

Name
Bulbasaur        4
Ivysaur          1
Venusaur         4
Charmander       4
Charmeleon       4
                ..
Iron Valiant    15
Koraidon        16
Miraidon        16
Walking Wake    13
Iron Leaves     14
Name: Type, Length: 1010, dtype: int64

In [118]:
def count_of_a(text):
    return text.count("a")

pokemon.apply(count_of_a)

In [120]:
pokemon

Name
Bulbasaur                   Maki
Ivysaur                        i
Venusaur                    qwer
Charmander                  Fire
Charmeleon                  wqew
                      ...       
Iron Valiant     Fairy, Fighting
Koraidon        Fighting, Dragon
Miraidon        Electric, Dragon
Walking Wake       Water, Dragon
Iron Leaves       Grass, Psychic
Name: Type, Length: 1010, dtype: object

# The map Method
* The **map** method 'maps' or connects each **Series** values to another value.
* We can pass the method a dictionary or a Series. Bothtypes connects keys to values.
* The **map** method uses our argument to connect or bridge together the values.

In [121]:
import pandas as pd

# Series.map()
series = pd.Series(['apple', 'banana', 'orange'])

# Using a dictionary to map values
fruit_dict = {'apple': 'red', 'banana': 'yellow', 'orange': 'orange'}
series_mapped = series.map(fruit_dict)

# Using a function to map values
def get_length(value):
    return len(value)

series_length = series.map(get_length)


In [122]:
series_length

0    5
1    6
2    6
dtype: int64

In [124]:
series_length = series.map(get_length)

In [125]:
series_length

0    5
1    6
2    6
dtype: int64

In [126]:
series_length = series.apply(get_length)

In [127]:
series_length

0    5
1    6
2    6
dtype: int64

In [128]:
add_lambda=lambda x,y:x+y

In [129]:
add_lambda(3,4)

7

In [130]:
numbers = [1, 2, 3, 4, 5]
squared_numbers = list(map(lambda x: x**2, numbers))
print(squared_numbers)  # Output: [1, 4, 9, 16, 25]


[1, 4, 9, 16, 25]


In [131]:
students = [('Alice', 22), ('Bob', 18), ('Charlie', 25)]
sorted_students = sorted(students, key=lambda student: student[1])


In [132]:
sorted_students

[('Bob', 18), ('Alice', 22), ('Charlie', 25)]

In [137]:
google.count()

4793