In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pyodbc
import sqlalchemy
import sqlite3
from subprocess import check_output
import os

%sql sqlite://

'Connected: @None'

In [2]:
actor = pd.read_csv('/kaggle/input/data-sakila-sql/actor.txt', sep = ';')
category = pd.read_csv('/kaggle/input/data-sakila-sql/category.txt', sep = ';')
customer = pd.read_csv('/kaggle/input/data-sakila-sql/customer.txt', sep = ';')
film = pd.read_csv('/kaggle/input/data-sakila-sql/film.txt', sep = ';')
film_cat = pd.read_csv('/kaggle/input/data-sakila-sql/film_category.txt', sep = ';')
inventory = pd.read_csv('/kaggle/input/data-sakila-sql/inventory.txt', sep = ';')
rental = pd.read_csv('/kaggle/input/data-sakila-sql/rental.txt', sep = ';')

In [3]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:////sakila', echo=False)

actor.to_sql('actor', con = engine)
category.to_sql('category', con = engine)
customer.to_sql('customer', con = engine)
film.to_sql('film', con = engine)
film_cat.to_sql('film_category', con = engine)
inventory.to_sql('inventory', con = engine)
rental.to_sql('rental', con = engine)

# PRACTICEs

This `kernel/notebook` includes:

> (i) **`Reformating`** strings & characters
> 
> (ii) **`Parsing`** string & characters
>
> (iii) **`Determine**` string **`length`** & character **`position`**
>
> (iv) **`Truncating & padding`** string data.

## 1. Reformatting string and character data.

#### 1.1. The `string concatenation operator`
Syntax:  `SELECT text_1` || ' ' || `text_2` AS `concatenate_text`

For example,

In [4]:
pd.read_sql(
    """
        SELECT first_name, last_name,
               first_name || ' ' || last_name AS full_name
        FROM customer
        LIMIT 5
    """, con = engine)

Unnamed: 0,first_name,last_name,full_name
0,MARY,SMITH,MARY SMITH
1,PATRICIA,JOHNSON,PATRICIA JOHNSON
2,LINDA,WILLIAMS,LINDA WILLIAMS
3,BARBARA,JONES,BARBARA JONES
4,ELIZABETH,BROWN,ELIZABETH BROWN


#### Using `CONCAT` function to concatenate 2 `strings`

Do the same thing as the preceding; but remove the `"||"`. Syntax

                CONCAT(text1, text2, text3)
For example,

In [5]:
pd.read_sql(
    """
        SELECT first_name, last_name,
               CONCAT(first_name, ' ' , last_name) AS full_name
        FROM customer
        LIMIT 5
    """, con = engine)

Unnamed: 0,first_name,last_name,full_name
0,MARY,SMITH,MARY SMITH
1,PATRICIA,JOHNSON,PATRICIA JOHNSON
2,LINDA,WILLIAMS,LINDA WILLIAMS
3,BARBARA,JONES,BARBARA JONES
4,ELIZABETH,BROWN,ELIZABETH BROWN


#### 1.2. String concatenation with a `non-string` input
In this example, we consider a `non-string` input hereby the `integer` from the `customer_id` column. 

Firstly, how can we join these without available function.

In [6]:
pd.read_sql(
    """
        SELECT customer_id, first_name, last_name,
               customer_id || ': ' || first_name || ' ' || last_name AS full_name
        FROM customer
        LIMIT 5
    """, con = engine)

Unnamed: 0,customer_id,first_name,last_name,full_name
0,1,MARY,SMITH,1: MARY SMITH
1,2,PATRICIA,JOHNSON,2: PATRICIA JOHNSON
2,3,LINDA,WILLIAMS,3: LINDA WILLIAMS
3,4,BARBARA,JONES,4: BARBARA JONES
4,5,ELIZABETH,BROWN,5: ELIZABETH BROWN


and using `CONCAT` again.

In [7]:
pd.read_sql(
    """ 
       SELECT customer_id, first_name, last_name,
              CONCAT(customer_id, ':', first_name, ' ', last_name) AS full_name
    """, con = engine)

Unnamed: 0,customer_id,first_name,last_name,full_name
0,1,MARY,SMITH,1: MARY SMITH
1,2,PATRICIA,JOHNSON,2: PATRICIA JOHNSON
2,3,LINDA,WILLIAMS,3: LINDA WILLIAMS
3,4,BARBARA,JONES,4: BARBARA JONES
4,5,ELIZABETH,BROWN,5: ELIZABETH BROWN


#### 1.3. Changing the `CASE` of strings

We always have **uppercase** and **lowercase** for any `string`, and in `SQL`, we call the functions **`UPPER(str)`** and **`LOWER(str)`**, respectively.

Moreover, to ***capitalize only the first character of each word***, we use the function **`INITCAP()`**

In [8]:
pd.read_sql(
    """
        SELECT email, UPPER(email), LOWER(email), INITCAP(email)
        FROM customer
        LIMIT 5
    """, con = engine)

Unnamed: 0,email,upper,lower,initcap
0,MARY.SMITH@sakilacustomer.org,MARY.SMITH@SAKILACUSTOMER.ORG,mary.smith@sakilacustomer.org,Mary.Smith@Sakilacustomer.Org
1,PATRICIA.JOHNSON@sakilacustomer.org,PATRICIA.JOHNSON@SAKILACUSTOMER.ORG,patricia.johnson@sakilacustomer.org,Patricia.Johnson@Sakilacustomer.Org
2,LINDA.WILLIAMS@sakilacustomer.org,LINDA.WILLIAMS@SAKILACUSTOMER.ORG,linda.williams@sakilacustomer.org,Linda.Williams@Sakilacustomer.Org


#### 1.4. Replacing characters in a string.

Syntax:     `REPLACE(columns, org_strings, new_strings)`

In [9]:
pd.read_sql(
    """
        SELECT description,
               REPLACE(description, 'A Astounding', 'An outstanding') AS corrected_text
        FROM film
        WHERE film_id = 63
    """, con = engine)

Unnamed: 0,description,corrected_text
0,A Astounding Character Study of a Madman And a...,An outstanding Character Study of a Madman And...


Now, there are many `article_grammar-mistakes` in the context of the column `description`!

For example, at the `film_id = 63`, the original_description is `A As....` but this must be `An A....`

In [10]:
pd.read_sql(
    """
        SELECT description, 
               REPLACE(
                 REPLACE(
                    REPLACE(
                       REPLACE(
                         REPLACE(description, 
                                'A A', 'An a'), 
                                   'A E', 'An e'), 
                                      'A I', 'An i'), 
                                         'A O', 'An o'), 
                                             'A U', 'An u') AS correct_article
        FROM film
        LIMIT 10
    """, con = engine)

Unnamed: 0,description,correct_article
0,A Fateful Display of a Womanizer And a Mad Sci...,A Fateful Display of a Womanizer And a Mad Sci...
1,A Awe-Inspiring Epistle of a Student And a Squ...,An awe-Inspiring Epistle of a Student And a Sq...
2,A Astounding Character Study of a Madman And a...,An astounding Character Study of a Madman And ...
3,A Unbelieveable Drama of a Student And a Husba...,An unbelieveable Drama of a Student And a Husb...
4,A Emotional Character Study of a Boat And a Pi...,An emotional Character Study of a Boat And a P...
5,A Emotional Reflection of a Teacher And a Man ...,An emotional Reflection of a Teacher And a Man...
6,A Fast-Paced Saga of a Frisbee And a Astronaut...,A Fast-Paced Saga of a Frisbee And a Astronaut...
7,A Insightful Documentary of a Boat And a Compo...,An insightful Documentary of a Boat And a Comp...
8,A Brilliant Epistle of a Teacher And a Sumo Wr...,A Brilliant Epistle of a Teacher And a Sumo Wr...
9,A Awe-Inspiring Panorama of a Crocodile And a ...,An awe-Inspiring Panorama of a Crocodile And a...


#### 1.5. Manipulating string data with `REVERSE`

The function `REVERSE` works as following,

In [11]:
pd.read_sql(
    """ 
        SELECT title, REVERSE(title) AS reverse_tit
        FROM film
        LIMIT 5
    """, con = engine)

Unnamed: 0,title,reverse_tit
0,BEACH HEARTBREAKERS,SREKAERBTRAEH HCAEB
1,BEAST HUNCHBACK,KCABHCNUH TSAEB
2,BEDAZZLED MARRIED,DEIRRAM DELZZADEB
3,BEHAVIOR RUNAWAY,YAWANUR ROIVAHEB
4,BETRAYED REAR,RAER DEYARTEB


### EXERCISEs
#### Exercise 1.1. Concatenating strings
In this exercise and the ones that follow, we are going to derive new fields from columns within the customer and film tables of the `DVD rental database`.

We'll start with the customer table and create a query to return the `customers name` and `email address` formatted such that we could use it as a `"To"` field in an email script or program. This format will look like the following:

            Brian Piccolo <bpiccolo@datacamp.com>

In the `first step` of the exercise, use the `||` `operator` to do the string concatenation and in the second step, use the `CONCAT()` functions.

#### Instructions
**Step 1.** Concatenate the `first_name` and `last_name` columns separated by a single space followed by email surrounded by `<` and `>`.

Display the first 10 values.

In [12]:
pd.read_sql(
    """
        SELECT first_name || ' ' || last_name || ' <' || email || '>' AS full_email 
        FROM customer
        LIMIT 10
    """, con = engine)

Unnamed: 0,full_email
0,MARY SMITH <MARY.SMITH@sakilacustomer.org>
1,PATRICIA JOHNSON <PATRICIA.JOHNSON@sakilacusto...
2,LINDA WILLIAMS <LINDA.WILLIAMS@sakilacustomer....
3,BARBARA JONES <BARBARA.JONES@sakilacustomer.org>
4,ELIZABETH BROWN <ELIZABETH.BROWN@sakilacustome...
5,JENNIFER DAVIS <JENNIFER.DAVIS@sakilacustomer....
6,MARIA MILLER <MARIA.MILLER@sakilacustomer.org>
7,SUSAN WILSON <SUSAN.WILSON@sakilacustomer.org>
8,MARGARET MOORE <MARGARET.MOORE@sakilacustomer....
9,DOROTHY TAYLOR <DOROTHY.TAYLOR@sakilacustomer....


**Step 2.** Now use the `CONCAT()` function to do the `same operation` as the previous step.

In [13]:
pd.read_sql(
    """ 
    SELECT CONCAT(first_name, ' ', last_name,  ' <', email, '>') AS full_email 
    FROM customer LIMIT 10
    """, con = engine)

Unnamed: 0,full_email
0,MARY SMITH <MARY.SMITH@sakilacustomer.org>
1,PATRICIA JOHNSON <PATRICIA.JOHNSON@sakilacusto...
2,LINDA WILLIAMS <LINDA.WILLIAMS@sakilacustomer....
3,BARBARA JONES <BARBARA.JONES@sakilacustomer.org>
4,ELIZABETH BROWN <ELIZABETH.BROWN@sakilacustome...
5,JENNIFER DAVIS <JENNIFER.DAVIS@sakilacustomer....
6,MARIA MILLER <MARIA.MILLER@sakilacustomer.org>
7,SUSAN WILSON <SUSAN.WILSON@sakilacustomer.org>
8,MARGARET MOORE <MARGARET.MOORE@sakilacustomer....
9,DOROTHY TAYLOR <DOROTHY.TAYLOR@sakilacustomer....


#### Exercise 1.2. Changing the case of string data
Now you are going to use the film and category tables to create a new field called `film_category` by concatenating the `category name` with the `film's title`. 

You will also format the result using functions you learned about in the preceding_part to transform the case of the fields you are selecting in the query; for example, the **`INITCAP()`** function which converts a string to title case.

#### Instructions
Convert the `film category` name to `uppercase`.

Convert the `first letter` of `each word` in the film's title to `upper case`.

Concatenate the converted `category name` and `film title` separated by a `colon`.

Convert the `description` column to lowercase then limit them to the first 25 values.

In [14]:
pd.read_sql(
    """
    SELECT UPPER(c.name)  || ': ' || INITCAP(f.title) AS film_category, 
           LOWER(f.description) AS description
    FROM film AS f 
        INNER JOIN film_category AS fc 
            ON f.film_id = fc.film_id 
          INNER JOIN category AS c 
            ON fc.category_id = c.category_id
    LIMIT 25;
    """, con = engine)

Unnamed: 0,film_category,description
0,DOCUMENTARY: Beach Heartbreakers,a fateful display of a womanizer and a mad sci...
1,CLASSICS: Beast Hunchback,a awe-inspiring epistle of a student and a squ...
2,FAMILY: Bedazzled Married,a astounding character study of a madman and a...
3,HORROR: Behavior Runaway,a unbelieveable drama of a student and a husba...
4,CHILDREN: Betrayed Rear,a emotional character study of a boat and a pi...
5,FAMILY: Bilko Anonymous,a emotional reflection of a teacher and a man ...
6,MUSIC: Birdcage Casper,a fast-paced saga of a frisbee and a astronaut...
7,FAMILY: Blues Instinct,a insightful documentary of a boat and a compo...
8,ANIMATION: Borrowers Bedazzled,a brilliant epistle of a teacher and a sumo wr...
9,SPORTS: Bubble Grosse,a awe-inspiring panorama of a crocodile and a ...


#### Exercise 1.3. Replacing string data
Sometimes you will need to make sure that the data you are extracting does not contain any whitespace. There are many different approaches you can take to cleanse and prepare your data for these situations. A common technique is to replace any whitespace with an underscore.

In this example, we are going to practice finding and replacing `whitespace characters` in the 25 first values from `title` column of the `film` table using the **`REPLACE()`** function.

#### Instructions
Replace all `whitespace` with an `underscore`.

In [15]:
pd.read_sql(
    """ 
        SELECT REPLACE(title, ' ', '_') AS title
        FROM film
        LIMIT 25
    """, con = engine)

Unnamed: 0,title
0,BEACH_HEARTBREAKERS
1,BEAST_HUNCHBACK
2,BEDAZZLED_MARRIED
3,BEHAVIOR_RUNAWAY
4,BETRAYED_REAR
5,BILKO_ANONYMOUS
6,BIRDCAGE_CASPER
7,BLUES_INSTINCT
8,BORROWERS_BEDAZZLED
9,BUBBLE_GROSSE


## 2. Parsing string and character data

#### 2.1. Determining the length of string.

Syntax:     `CHAR_LENGTH(strings)`

and equivalent to

                    LENGTH(strings)

For example,

In [16]:
pd.read_sql(
    """ 
        SELECT title, 
               CHAR_LENGTH(title) AS char_len,
               LENGTH(title) AS length
        FROM film LIMIT 5
    """, con = engine)

Unnamed: 0,title,char_len,length
0,BEACH HEARTBREAKERS,19,19
1,BEAST HUNCHBACK,15,15
2,BEDAZZLED MARRIED,17,17
3,BEHAVIOR RUNAWAY,16,16
4,BETRAYED REAR,13,13


#### 2.2. Finding a position of a character in a string.

For example; we want to find the position of `@` in the `email` from the table `customer`,

In [17]:
pd.read_sql(
    """
        SELECT email, POSITION('@' IN email), LENGTH(email)
        FROM customer
        LIMIT 8
    """, con = engine)

Unnamed: 0,email,length,position('@' in email),"strpos(email, '@')"
0,MARY.SMITH@sakilacustomer.org,29,11,11
1,PATRICIA.JOHNSON@sakilacustomer.org,35,17,17
2,LINDA.WILLIAMS@sakilacustomer.org,33,15,15
3,BARBARA.JONES@sakilacustomer.org,32,14,14
4,ELIZABETH.BROWN@sakilacustomer.org,34,16,16
5,JENNIFER.DAVIS@sakilacustomer.org,33,15,15
6,MARIA.MILLER@sakilacustomer.org,31,13,13
7,SUSAN.WILSON@sakilacustomer.org,31,13,13 ...


#### 2.3. Parsing string data.

| **Functions & syntax** | **Usages** |
|------------------------|------------|
| `LEFT(string, integer)`| Extract the first `N` integer characters ***from the left*** of the given string | 
|`RIGHT(string, integer)`| Extract the first `N` integer characters ***from the right*** of the given string |

For example,

In [18]:
pd.read_sql(
    """ 
        SELECT description,
               LEFT(description, 60),
               RIGHT(description, 60)
        FROM film
        LIMIT 3
    """, con = engine)

Unnamed: 0,description,left_60,right_60
0,A Fateful Display of a Womanizer And a Mad Sci...,A Fateful Display of a Womanizer And a Mad Sci...,a Mad Scientist who must Outgun a A Shark in ...
1,A Awe-Inspiring Epistle of a Student And a Squ...,A Awe-Inspiring Epistle of a Student And a Squ...,tudent And a Squirrel who must Defeat a Boy in...
2,A Astounding Character Study of a Madman And a...,A Astounding Character Study of a Madman And a...,obot who must Meet a Mad Scientist in An Aband...


#### 2.4. Extracting the substring from a given string.

We can use both `SUBSTRING` and `SUBSTR` with the same syntax

                    function(strings, starting_position, ending_position)

For example,

In [19]:
pd.read_sql(
    """ 
        SELECT email,
                SUBSTRING(email, 10, 20) AS use_substring_from10th_to20th,
                SUBSTR(email, 10, 20) AS use_substr_from10th_to20th,
                SUBSTRING(email FROM 0 FOR POSITION('@' IN email) ) AS subtring_bef_em,
                SUBSTRING(email FROM POSITION('@' IN email)+1 FOR LENGTH(email) ) AS subtring_aft_em
        FROM customer
        LIMIT 3
    """, con = engine)

Unnamed: 0,email,use_substring_from10th_to20th,use_substr_from10th_to20th,subtring_bef_em,subtring_aft_em
0,MARY.SMITH@sakilacustomer.org,H@sakilacustomer.org,H@sakilacustomer.org,MARY.SMITH,sakilacustomer.org
1,PATRICIA.JOHNSON@sakilacustomer.org,JOHNSON@sakilacustom,JOHNSON@sakilacustom,PATRICIA.JOHNSON,sakilacustomer.org
2,LINDA.WILLIAMS@sakilacustomer.org,LIAMS@sakilacustomer,LIAMS@sakilacustomer,LINDA.WILLIAMS,sakilacustomer.org


But ***noting that the function SUBSTR()*** can not use belong with `FROM, FOR` and `POSITION, LENGTH` insight as in the `SUBSTRING`.

Indeed, when typing

                SELECT SUBSTR(email FROM 0 FOR POSITION('@' IN email) )
                FROM customer
we will receive a following warning:

                    syntax error at or near "FROM"
                    LINE 2:         SUBSTR(email FROM 0 FOR POSITION('@' IN email) )
                                                 ^

### EXERCISEs

#### Exercise 2.1. Determining the length of strings
Determining the number of characters in a string is something that you will use frequently when working with data in a `SQL` database. Many situations will require you to find the length of a string stored in your database. 

For example, you may need to limit the number of characters that are displayed in an application or you may need to ensure that a column in your dataset contains values that are all the same length. In this example, we are going to determine the length of the description column in the film table of the DVD Rental database.

#### Instructions
Select the title and description columns from the `film` table.

Find the number of `characters` in the `description` column with the alias `desc_len`.

In [20]:
pd.read_sql(
    """ 
        SELECT title, description,
               LENGTH(description) AS desc_len
        FROM film 
        LIMIT 29
    """, con = engine)

Unnamed: 0,title,description,desc_len
0,BEACH HEARTBREAKERS,A Fateful Display of a Womanizer And a Mad Sci...,96
1,BEAST HUNCHBACK,A Awe-Inspiring Epistle of a Student And a Squ...,90
2,BEDAZZLED MARRIED,A Astounding Character Study of a Madman And a...,108
3,BEHAVIOR RUNAWAY,A Unbelieveable Drama of a Student And a Husba...,91
4,BETRAYED REAR,A Emotional Character Study of a Boat And a Pi...,92
5,BILKO ANONYMOUS,A Emotional Reflection of a Teacher And a Man ...,99
6,BIRDCAGE CASPER,A Fast-Paced Saga of a Frisbee And a Astronaut...,92
7,BLUES INSTINCT,A Insightful Documentary of a Boat And a Compo...,113
8,BORROWERS BEDAZZLED,A Brilliant Epistle of a Teacher And a Sumo Wr...,100
9,BUBBLE GROSSE,A Awe-Inspiring Panorama of a Crocodile And a ...,88


#### Exercise 2.2. Truncating strings
In the previous exercise, you calculated the length of the `description` column and noticed that the number of `characters` varied but ***most of the results were over 75 characters***. 

There will be many times when you need to truncate a text column to a certain length to meet specific criteria for an application. In this exercise, we will practice getting the `first 50 characters` of the `description` column.

#### Instructions
Select the `first 50 characters` of the `description` column with the alias `short_desc`.

In [21]:
pd.read_sql(
    """ 
        SELECT LEFT(description, 50) AS short_desc
        FROM  film AS f
        LIMIT 33
    """, con = engine)

Unnamed: 0,short_desc
0,A Fateful Display of a Womanizer And a Mad Sci...
1,A Awe-Inspiring Epistle of a Student And a Squ...
2,A Astounding Character Study of a Madman And a...
3,A Unbelieveable Drama of a Student And a Husba...
4,A Emotional Character Study of a Boat And a Pi...
5,A Emotional Reflection of a Teacher And a Man ...
6,A Fast-Paced Saga of a Frisbee And a Astronaut...
7,A Insightful Documentary of a Boat And a Compo...
8,A Brilliant Epistle of a Teacher And a Sumo Wr...
9,A Awe-Inspiring Panorama of a Crocodile And a ...


#### Exercise 2.3. Extracting substrings from text data
In this exercise, you are going to practice how to extract substrings from text columns. The `Sakila database` contains the `address` table which stores the `street address` for all the `rental store locations`. 

You need a list of all the `street names` where the stores are located but the `address column` also contains the `street number`. You'll use several functions that you've learned about in the video to manipulate the `address column` and return only the `street address`.

#### Instructions
Extract only the `street address` **without the street number** from the `address column`.

Use functions to determine the `starting` and `ending` position parameters.

In [22]:
pd.read_sql(
    """ 
        SELECT SUBSTRING(address FROM POSITION(' ' IN address)+1 FOR LENGTH(address)) AS st_address
        FROM  address
        LIMIT 45;
    """, con = engine)

Unnamed: 0,st_address
0,MySakila Drive
1,MySQL Boulevard
2,Workhaven Lane
3,Lillydale Drive
4,Hanoi Way
5,Loja Avenue
6,Joliet Street
7,Inegl Manor
8,Idfu Parkway
9,Santiago de Compostela Way


Nice! The **`SUBSTRING()`** function is useful when you need to `parse substrings` from the middle of text data and as you can see can be powerful when combined with the **`POSITION()`** and **`LENGTH()`** functions.

#### Exercise 2.4. Combining functions for string manipulation
In the next example, we are going to break apart the email column from the customer table into three new derived fields. `Parsing a single` column into multiple columns can be useful when you need to work with certain subsets of data. 

`Email addresses` have embedded information stored in them that can be parsed out to derive additional information about our data. 

For example, we can use the techniques we learned about in this file **to determine how many of our customers use an email from a specific domain**.

#### Instructions
Extract the `characters` to the **`left of the @ of the email`** column in the customer table and alias it as `username`.

Now use **`SUBSTRING`** to extract the characters after the `@` of the `email column` and alias the new derived `field` as domain.

In [23]:
pd.read_sql(
    """ 
        SELECT LEFT(email, POSITION('@' IN email)-1) AS username,  
               SUBSTRING(email FROM POSITION('@' IN email)+1 FOR LENGTH(email)) AS domain
        FROM customer
        LIMIT 39
    """, con = engine)

Unnamed: 0,username,domain
0,MARY.SMITH,sakilacustomer.org
1,PATRICIA.JOHNSON,sakilacustomer.org
2,LINDA.WILLIAMS,sakilacustomer.org
3,BARBARA.JONES,sakilacustomer.org
4,ELIZABETH.BROWN,sakilacustomer.org
5,JENNIFER.DAVIS,sakilacustomer.org
6,MARIA.MILLER,sakilacustomer.org
7,SUSAN.WILSON,sakilacustomer.org
8,MARGARET.MOORE,sakilacustomer.org
9,DOROTHY.TAYLOR,sakilacustomer.org


## 3. Truncating and padding string data

#### 3.1. Removing whitespace from a string.

**`TRIM`** function, syntax:

            TRIM([leading /or/ trailing /or/ both] [character] FROM [string])
where
> the 1st params: specify the method to remove, we have 3 options:
> 
> the 2nd params: specify characters to remove in the function
>
> the last params: specify the string to process / remove the characters. 

For example, when typing

                    SELECT TRIM('    padded     ') AS trim,    -- both : remove white_spaces in the left and right of word/characters
                           LTRIM('    padded     ') AS l_trim, -- leading : remove white_spaces in the left
                           RTRIM('    padded     ') AS t_trim  -- trailing
then the `query result` will be
    
                    +------+------------+-----------+
                    | trim | l_trim     | r_trim    |
                    +------+------------+-----------+
                    |padded| padded     |    padded |
                    +------+------------+-----------+
Another example,

In [24]:
pd.read_sql(
    """
        SELECT TRIM('!#$@   ' FROM '   @   @!! hello_python_and_SQL#    ') AS trim_1,
               TRIM('   !#$@   ' FROM '   @   @!! *hello_python_and_SQL#    ') AS trim_2,
               TRIM('!#$@   ' FROM '   @*   @!! hello_python_and_SQL#*@    ') AS trim_3
    """, con = engine)

Unnamed: 0,trim_1,trim_2,trim_3
0,hello_python_and_SQL,*hello_python_and_SQL,* @!! hello_python_and_SQL#*


#### 3.2. Padding string with character data.

These functions are **`LPAD`** and **`RPAD`**, so how it works??

In [25]:
pd.read_sql(
    """ 
        SELECT LPAD('padding', 10) AS lpad_10,
               LPAD('padding', 5) AS lpad_5,    
               LPAD('padding', 10, '$') AS lpad_10_added,
               RPAD('padding', 5) AS rpad_5,
               RPAD('padding', 10, '#') AS rpad_10,
               RPAD('padding', 5, '!') AS rpad_5_add
    """, con = engine)

Unnamed: 0,lpad_10,lpad_5,lpad_10_added,rpad_5,rpad_10,rpad_5_added
0,padding,paddi,$$$padding,paddi,padding###,paddi


So, we can see that,

> `LPAD(text, pad_integer = 10, added_character = none)` extract 10 characters from the left of the word `padding` and its length is 7, so the first one is non-changed! Likewise, for the `pad_integer = 5` then we receive the first 5 characters from the left : `paddi`. When we have `add = '$'` then the output will appears the number of `added_character`; this number is defined by 

                        max(0, pad_integer - length(text))
>
>  `RPAD(text, pad_integer = 10, added_character = none)` do the samething but when `added_character IS not NULL` then they add the characters to the right of the given_text.

### EXERCISEs

#### Exercise 3.1. Padding
Padding strings is useful in many real-world situations. Earlier in this course, we learned about string concatenation and how to combine the customer's first and last name separated by a single blank space and also combined the customer's full name with their email address.

The padding functions that we learned about in the video are an alternative approach to do this task. To use this approach, you will need to combine and nest functions to determine the length of a string to produce the desired result. Remember when calculating the length of a string you often need to adjust the integer returned to get the proper length or position of a string.

Let's revisit the string concatenation exercise but use padding functions.

#### Instructions 
**Step 1.** Add a single space to the `end or right` of the `first_name` column using a padding function.

Use the `||` operator to concatenate the padded `first_name` to the `last_name` column.

In [26]:
pd.read_sql(
    """ 
    SELECT RPAD(first_name, LENGTH(first_name)+1) || last_name AS full_name
    FROM customer
    LIMIT 29
    """, con = engine)

Unnamed: 0,full_name
0,MARY SMITH
1,PATRICIA JOHNSON
2,LINDA WILLIAMS
3,BARBARA JONES
4,ELIZABETH BROWN
5,JENNIFER DAVIS
6,MARIA MILLER
7,SUSAN WILSON
8,MARGARET MOORE
9,DOROTHY TAYLOR


**Step 2.** Now add a single space to the left or beginning of the `last_name` column using a different padding function than the first step.

Use the `||` operator to concatenate the `first_name` column to the padded `last_name`.

In [27]:
pd.read_sql(
    """
        SELECT first_name || LPAD(last_name, LENGTH(last_name)+1) AS full_name
        FROM customer
        LIMIT 29
    """, con = engine)

Unnamed: 0,full_name
0,MARY SMITH
1,PATRICIA JOHNSON
2,LINDA WILLIAMS
3,BARBARA JONES
4,ELIZABETH BROWN
5,JENNIFER DAVIS
6,MARIA MILLER
7,SUSAN WILSON
8,MARGARET MOORE
9,DOROTHY TAYLOR


**Step 3.** Add a `single space` to the `right or end` of the `first_name` column then add the characters `<` to the `right or end` of `last_name` column.

Finally, add the characters `>` to the `right or end` of the `email` column.

In [28]:
pd.read_sql(
    """ 
        SELECT 
            RPAD(first_name, LENGTH(first_name)+1) 
            || RPAD(last_name, LENGTH(last_name)+2, ' <') 
            || RPAD(email, LENGTH(email)+1, '>') AS full_email
        FROM customer
        LIMIT 29
    """, con = engine)

Unnamed: 0,full_email
0,MARY SMITH <MARY.SMITH@sakilacustomer.org>
1,PATRICIA JOHNSON <PATRICIA.JOHNSON@sakilacusto...
2,LINDA WILLIAMS <LINDA.WILLIAMS@sakilacustomer....
3,BARBARA JONES <BARBARA.JONES@sakilacustomer.org>
4,ELIZABETH BROWN <ELIZABETH.BROWN@sakilacustome...
5,JENNIFER DAVIS <JENNIFER.DAVIS@sakilacustomer....
6,MARIA MILLER <MARIA.MILLER@sakilacustomer.org>
7,SUSAN WILSON <SUSAN.WILSON@sakilacustomer.org>
8,MARGARET MOORE <MARGARET.MOORE@sakilacustomer....
9,DOROTHY TAYLOR <DOROTHY.TAYLOR@sakilacustomer....


#### Exercise 3.2. The TRIM function
In this exercise, we are going to revisit and combine a couple of exercises from earlier in this chapter. If you recall, you used the `LEFT()` function to truncate the description column to 50 characters but saw that some words were cut off and/or had `trailing whitespace`. 

We can use trimming functions ***to eliminate the `whitespace` at the end of the string after it's been truncated***.

#### Instructions
Convert the `film category name` to `uppercase` and use the `CONCAT()` concatenate it with the `title`.

`Truncate` the description to the first 50 characters and ***make sure there is no leading or trailing `whitespace` after truncating***.


In [29]:
pd.read_sql(
    """ 
        SELECT CONCAT(UPPER(c.name), ': ', f.title) AS film_category, 
        TRIM(LEFT(f.description, 50)) AS film_desc -- Truncate the description remove trailing whitespace
        FROM film AS f 
            INNER JOIN film_category AS fc 
                ON f.film_id = fc.film_id 
            INNER JOIN category AS c 
                ON fc.category_id = c.category_id
         LIMIT 39
    """, con = engine)

Unnamed: 0,film_category,film_desc
0,DOCUMENTARY: BEACH HEARTBREAKERS,A Fateful Display of a Womanizer And a Mad Sci...
1,CLASSICS: BEAST HUNCHBACK,A Awe-Inspiring Epistle of a Student And a Squ...
2,FAMILY: BEDAZZLED MARRIED,A Astounding Character Study of a Madman And a...
3,HORROR: BEHAVIOR RUNAWAY,A Unbelieveable Drama of a Student And a Husba...
4,CHILDREN: BETRAYED REAR,A Emotional Character Study of a Boat And a Pi...
5,FAMILY: BILKO ANONYMOUS,A Emotional Reflection of a Teacher And a Man who
6,MUSIC: BIRDCAGE CASPER,A Fast-Paced Saga of a Frisbee And a Astronaut...
7,FAMILY: BLUES INSTINCT,A Insightful Documentary of a Boat And a Composer
8,ANIMATION: BORROWERS BEDAZZLED,A Brilliant Epistle of a Teacher And a Sumo Wr...
9,SPORTS: BUBBLE GROSSE,A Awe-Inspiring Panorama of a Crocodile And a ...


#### Exercise 3.3. Putting it all together
In this exercise, we are going to use the film and category tables to create a new field called `film_category` by concatenating the category name with the film's title. You will also practice how to truncate text fields like the film table's description column without cutting off a word.

To accomplish this we will use the `REVERSE()` function to help determine the position of the last `whitespace character` in the description before we reach 50 characters. This technique can be used to determine the position of the last character that you want to truncate and ensure that it is less than or equal to 50 characters `AND` does not cut off a word.

This is an advanced technique but I know you can do it! Let's dive in.

#### Instructions
Get the first 50 characters of the `description` column

Determine the position of the last whitespace character of the truncated `description` column and subtract it from the number 50 as the second parameter in the first function above.

In [30]:
pd.read_sql(
    """ 
        SELECT UPPER(c.name) || ': ' || f.title AS film_category, 
              -- Truncate the description without cutting off a word with function LEFT
              -- Then, subtract the position of the first whitespace character with POSITION
              LEFT(description, 50 - POSITION(' ' IN REVERSE(LEFT(description, 50)))) 
        FROM 
          film AS f 
          INNER JOIN film_category AS fc 
            ON f.film_id = fc.film_id 
          INNER JOIN category AS c 
            ON fc.category_id = c.category_id;    
    """, con = engine)

Unnamed: 0,film_category,left
0,DOCUMENTARY: BEACH HEARTBREAKERS,A Fateful Display of a Womanizer And a Mad
1,CLASSICS: BEAST HUNCHBACK,A Awe-Inspiring Epistle of a Student And a
2,FAMILY: BEDAZZLED MARRIED,A Astounding Character Study of a Madman And a
3,HORROR: BEHAVIOR RUNAWAY,A Unbelieveable Drama of a Student And a Husband
4,CHILDREN: BETRAYED REAR,A Emotional Character Study of a Boat And a
5,FAMILY: BILKO ANONYMOUS,A Emotional Reflection of a Teacher And a Man who
6,MUSIC: BIRDCAGE CASPER,A Fast-Paced Saga of a Frisbee And a Astronaut
7,FAMILY: BLUES INSTINCT,A Insightful Documentary of a Boat And a Composer
8,ANIMATION: BORROWERS BEDAZZLED,A Brilliant Epistle of a Teacher And a Sumo
9,SPORTS: BUBBLE GROSSE,A Awe-Inspiring Panorama of a Crocodile And a
