<a href="https://colab.research.google.com/github/ThivyaTS/demo/blob/main/Cert_Generator_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cert Generator

## Goal of this project
  1. `Do simple Data Analysis`
    - Deal with missing values
    - Format the date or text
  2. `Enhance Python Concepts`
  3. `Explore more 3rd party libraries - reportlab`



---


# 1. Download & Importing packages for this project



---



In [None]:
# install external package 'reportlab' because it is not part of normal python
# reportlab is a library that links pdf to python program

# 'pip' - python package downloader ( package manager )
!pip install reportlab

Collecting reportlab
  Downloading reportlab-4.2.5-py3-none-any.whl.metadata (1.5 kB)
Downloading reportlab-4.2.5-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.2.5


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

from reportlab.lib.pagesizes import landscape, A4
from reportlab.pdfgen import canvas
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont



---

# 2. Reading & Exploring the Excel File



---



In [None]:
df = pd.read_excel("/content/drive/MyDrive/1.1 Python Certificate Generator Project/dataset.xlsx")

In [None]:
df

Unnamed: 0,Name,Course,CourseLevel,Date
0,Christy Cunningham,Python,Beginner,2023-09-10
1,Douglas Tucker,PYTHON,MASTER,2023-09-11
2,Travis Walters,Java,Intermediate,2023-09-12
3,Nathaniel Harris,Web Development,Advanced,2023-09-13
4,-,,Advanced,NaT
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14
6,Erik Smith,Mobile Development,Beginner,2023-09-15
7,Kristopher Johnson,Python,Beginner,2023-09-16
8,Jonathan Bucker,,,NaT
9,Robert Buck,PYTHON,Master,2023-09-17


# 3. Data Cleaning (Data Analysis)

- Deal with missing values
- Format the date & text

In [None]:
# the 3 problems
# 1. Inconcistency formatting in "Course" & 'CourseLevel' columns
# 2. Date Format (yyyy/mm/dd) --> we want to change it to (dd/mm/yyyy)
# 3. Empty Rows

In [None]:
# Problem 3
# I want to drop all the rows that have AT LEAST 1 empty column
df = df.dropna()

In [None]:
df # from 13 - 11 rows

Unnamed: 0,Name,Course,CourseLevel,Date
0,Christy Cunningham,Python,Beginner,2023-09-10
1,Douglas Tucker,PYTHON,MASTER,2023-09-11
2,Travis Walters,Java,Intermediate,2023-09-12
3,Nathaniel Harris,Web Development,Advanced,2023-09-13
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14
6,Erik Smith,Mobile Development,Beginner,2023-09-15
7,Kristopher Johnson,Python,Beginner,2023-09-16
9,Robert Buck,PYTHON,Master,2023-09-17
10,Joseph Mcdonald,Java,Intermediate,2023-09-18
11,Jerome Abbott,Web Development,Advanced,2023-09-19


In [None]:
# Problem 2 : Date Format (yyyy/mm/dd) --> (dd/mm/yyyy)

df['Date']

Unnamed: 0,Date
0,2023-09-10
1,2023-09-11
2,2023-09-12
3,2023-09-13
5,2023-09-14
6,2023-09-15
7,2023-09-16
9,2023-09-17
10,2023-09-18
11,2023-09-19


In [None]:
# iloc --> index location
df['Date'].iloc[0]

# Timestamp is a data type to represent time in (excel , df)
#  we need to change Timestamp('2023-09-10 00:00:00') --> 10/09/2023

Timestamp('2023-09-10 00:00:00')

In [None]:
df['FormattedDate'] = df["Date"].dt.strftime("%d/%m/%Y")

# in order to format dates , you can access 'dt' in Pandas
# dt - datetime ( u will access a lot of commands on dealing with dates inside the column)
# one of the command in 'dt' is --> strftime
# strftime (string formatted time) --> convert timestamp object into string representations (following a specified format)

# %Y --> 2023
# %y --> 23

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['FormattedDate'] = df["Date"].dt.strftime("%d/%m/%Y")


In [None]:
df['FormattedDate'].dtype

# 'O' --> Object --> String

dtype('O')

In [None]:
df

Unnamed: 0,Name,Course,CourseLevel,Date,FormattedDate
0,Christy Cunningham,Python,Beginner,2023-09-10,10/09/2023
1,Douglas Tucker,PYTHON,MASTER,2023-09-11,11/09/2023
2,Travis Walters,Java,Intermediate,2023-09-12,12/09/2023
3,Nathaniel Harris,Web Development,Advanced,2023-09-13,13/09/2023
5,Tonya Carter,AI & Machine Learning,Beginner,2023-09-14,14/09/2023
6,Erik Smith,Mobile Development,Beginner,2023-09-15,15/09/2023
7,Kristopher Johnson,Python,Beginner,2023-09-16,16/09/2023
9,Robert Buck,PYTHON,Master,2023-09-17,17/09/2023
10,Joseph Mcdonald,Java,Intermediate,2023-09-18,18/09/2023
11,Jerome Abbott,Web Development,Advanced,2023-09-19,19/09/2023


In [None]:
df = df.drop("Date" , axis = 1)

In [None]:
df.head()

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/2023
1,Douglas Tucker,PYTHON,MASTER,11/09/2023
2,Travis Walters,Java,Intermediate,12/09/2023
3,Nathaniel Harris,Web Development,Advanced,13/09/2023
5,Tonya Carter,AI & Machine Learning,Beginner,14/09/2023


In [None]:
# Problem 1 : Inconsistency formatting in 'Course' & 'CourseLevel'

# .capitalize() --> turn only the first letter in UPPERCASE form

df['Course'] = df['Course'].str.capitalize()
df['CourseLevel'] = df['CourseLevel'].str.capitalize()

In [None]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/2023
1,Douglas Tucker,Python,Master,11/09/2023
2,Travis Walters,Java,Intermediate,12/09/2023
3,Nathaniel Harris,Web development,Advanced,13/09/2023
5,Tonya Carter,Ai & machine learning,Beginner,14/09/2023
6,Erik Smith,Mobile development,Beginner,15/09/2023
7,Kristopher Johnson,Python,Beginner,16/09/2023
9,Robert Buck,Python,Master,17/09/2023
10,Joseph Mcdonald,Java,Intermediate,18/09/2023
11,Jerome Abbott,Web development,Advanced,19/09/2023


# 4 . Registering Fonts into our Font

In [None]:
fonts_path = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/fonts"
# storing the path inside a string variable

In [None]:
# registering the 2 fonts into this project

# pdfmetrics is 1 of the package we imported ! (this oackage is used to register fonts so that we can use it with pdf files)

# .ttf is an extension for fonts files

# one of the commands in pdfmetrics is --> registerFont()

# TTFont() --> needs 2 inputs
# 1st input --> what font are you registering
# 2nd input --> where is the file of the font

pdfmetrics.registerFont(TTFont('Lora-Bold' , fonts_path + "/Lora-Bold.ttf"))
pdfmetrics.registerFont(TTFont('Lora-Regular' , fonts_path + "/Lora-Regular.ttf"))

In [None]:
# after registering , we can use these fonts in this project later!

# 5. Creating Certificate Generator Logic Function





---

## Version 1 of the Function


In [None]:
def certificate_generator(name , courseName , courseLevel , date):

  # storing the generated file name inside a variable "pdf_file_name"
  # Jerome Abbott-Web development-Advanced.pdf
  pdf_file_name = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificates/" + name + '-' + courseName + '-' + courseLevel + '.pdf'


  # Canvas() needs 2 inputs
  # 1st input --> where are you storing it ? and the file name
  # 2nd input --> what size ?
  c = canvas.Canvas(pdf_file_name , pagesize = landscape(A4))

  # the canvas has this drawImage() command , which needs 5 inputs
  # 1st input --> image --> certificate template
  # 2nd input --> x axis (coordinate 0)
  # 3rd input --> y axis (coordinate 0)
  # 4th input --> width --> A4[1] (using the A4 variable that we imported)
  # 5th input --> height --> A4[0] (using the A4 variable that we imported)

  # 2nd & 3rd input --> (x,y) --> (0,0) --> u want to place the image (ceritficate template) from 0,0 (very top left corner)
  # 4th & 5th input --> width & height --> how big you want the image to be --> normal A4 size
  # A4[0] --> standard A4 height
  # A4[1] --> standard A4 width

  template = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificate_template.jpg"

  # drawing our cert template into our canvas
  c.drawImage(template , 0 , 0 , A4[1] , A4[0])

  c.save()

In [None]:
certificate_generator("Thivya" , "Python" , "Beginner" , "13/10/2024")

## Version 2 of the function

In [None]:
def certificate_generator(name , courseName , courseLevel , date):

  pdf_file_name = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificates/" + name + '-' + courseName + '-' + courseLevel + '.pdf'
  c = canvas.Canvas(pdf_file_name , pagesize = landscape(A4))
  template = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificate_template.jpg"
  c.drawImage(template , 0 , 0 , A4[1] , A4[0])

  print('---------------')
  # middle of the A4 page (width) --> middle  --> /2
  center_x = c._pagesize[0] / 2

  print("Full width of the A4:  " , c._pagesize[0])
  print("Center of the width of the A4:  " , center_x)

  print('---------------')
  # middle of the A4 page (height) --> middle  --> /2
  center_y = c._pagesize[1] / 2
  print("Full height of the A4:  " , c._pagesize[1])
  print("Center of the height of the A4:  " , center_y)

  c.save()

In [None]:
certificate_generator("Thivya" , "Python" , "Beginner" , "13/10/2024")

---------------
Full width of the A4:   841.8897637795277
Center of the width of the A4:   420.94488188976385
---------------
Full height of the A4:   595.2755905511812
Center of the height of the A4:   297.6377952755906


# Version 3 of the Function

In [None]:
def certificate_generator(name , courseName , courseLevel , date):

  pdf_file_name = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificates/" + name + '-' + courseName + '-' + courseLevel + '.pdf'
  c = canvas.Canvas(pdf_file_name , pagesize = landscape(A4))
  template = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificate_template.jpg"
  c.drawImage(template , 0 , 0 , A4[1] , A4[0])

  center_x = c._pagesize[0] / 2
  center_y = c._pagesize[1] / 2

  #-------------------------------
  # .setFont() --> needs 2 inputs

  # 1st input --> font type
  # 2nd input --> font size (in px)
  #-------------------------------
  #-------------------------------
  # drawCentredString() --> needs 3 inputs
  # 1st input --> x axis
  # 2nd input --> y axis
  # 3rd input --> what you are drawing (text) on the canvas
  #-------------------------------

  # 1. Name
  c.setFont('Lora-Bold' , 30)
  c.drawCentredString(center_x , center_y - 46, name)

  # 2. CourseName & CourseLevel
  c.setFont('Lora-Bold' , 28)
  c.drawCentredString(center_x , center_y - 105, courseName + "-" + courseLevel)

  # 3. Date
  c.setFont('Lora-Bold' , 17)
  c.drawCentredString(center_x + 190 , center_y - 160, date)

  # 3. Cert ID
  cert_id = "Cert-ID: " + str(pd.Timestamp.now().timestamp()).replace('.',"")
  c.setFont('Lora-Regular' , 12)
  c.drawCentredString(center_x + 266 , center_y - 230, cert_id)

  c.save()

In [None]:
certificate_generator("Thivya" , "Python" , "Beginner" , "13/10/2024")

In [None]:
# For Cert ID , it has to be unique , we can uniquely generate an ID based on current time (using pd.Timestamp)
print(pd.Timestamp.now())
print(pd.Timestamp.now().timestamp()) # Unix timestamp (number of seconds since January 1 1970)
print(str(pd.Timestamp.now().timestamp()).replace('.',""))

2024-10-21 12:46:51.537061
1729514811.537719
1729514811537944


# Version 4 of the Function

In [None]:
def certificate_generator(name , courseName , courseLevel , date):

  pdf_file_name = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificates/" + name + '-' + courseName + '-' + courseLevel + '.pdf'
  c = canvas.Canvas(pdf_file_name , pagesize = landscape(A4))
  template = "/content/drive/MyDrive/1.1 Python Certificate Generator Project/certificate_template.jpg"
  c.drawImage(template , 0 , 0 , A4[1] , A4[0])

  center_x = c._pagesize[0] / 2
  center_y = c._pagesize[1] / 2

  # 1. Name
  c.setFont('Lora-Bold' , 30)
  c.drawCentredString(center_x , center_y - 46, name)

  # 2. CourseName & CourseLevel
  c.setFont('Lora-Bold' , 28)
  c.drawCentredString(center_x , center_y - 105, courseName + "-" + courseLevel)

  # 3. Date
  c.setFont('Lora-Bold' , 17)
  c.drawCentredString(center_x + 190 , center_y - 160, date)

  # 3. Cert ID
  cert_id = "Cert-ID: " + str(pd.Timestamp.now().timestamp()).replace('.',"")
  c.setFont('Lora-Regular' , 12)
  c.drawCentredString(center_x + 266 , center_y - 230, cert_id)

  c.save()

In [None]:
df

Unnamed: 0,Name,Course,CourseLevel,FormattedDate
0,Christy Cunningham,Python,Beginner,10/09/2023
1,Douglas Tucker,Python,Master,11/09/2023
2,Travis Walters,Java,Intermediate,12/09/2023
3,Nathaniel Harris,Web development,Advanced,13/09/2023
5,Tonya Carter,Ai & machine learning,Beginner,14/09/2023
6,Erik Smith,Mobile development,Beginner,15/09/2023
7,Kristopher Johnson,Python,Beginner,16/09/2023
9,Robert Buck,Python,Master,17/09/2023
10,Joseph Mcdonald,Java,Intermediate,18/09/2023
11,Jerome Abbott,Web development,Advanced,19/09/2023


In [None]:
for x in ["Apple" , "Banana" , "Durian"]:
  print(x)

Apple
Banana
Durian


In [None]:
# enumerate() --> keep track of the index in the df

for index , x in enumerate(["Apple" , "Banana" , "Durian"]):
  print(str(index) + " " + x)

0 Apple
1 Banana
2 Durian


In [None]:
for x in df:
  print(x)

Name
Course
CourseLevel
FormattedDate


In [None]:
# df.iterrows gives you a loppable (array-like)
# df.iterrows gives you 2 items (index , item(row))

for index , x in df.iterrows():
  print(x)
  print('-' * 30)

Name             Christy Cunningham
Course                       Python
CourseLevel                Beginner
FormattedDate            10/09/2023
Name: 0, dtype: object
------------------------------
Name             Douglas Tucker
Course                   Python
CourseLevel             Master 
FormattedDate        11/09/2023
Name: 1, dtype: object
------------------------------
Name             Travis Walters
Course                     Java
CourseLevel        Intermediate
FormattedDate        12/09/2023
Name: 2, dtype: object
------------------------------
Name             Nathaniel Harris
Course            Web development
CourseLevel              Advanced
FormattedDate          13/09/2023
Name: 3, dtype: object
------------------------------
Name                      Tonya Carter
Course           Ai & machine learning
CourseLevel                   Beginner
FormattedDate               14/09/2023
Name: 5, dtype: object
------------------------------
Name                     Erik Smith
Co

In [None]:
count = 0
for index , x in df.iterrows():
  certificate_generator(x['Name'] , x['Course'] , x['CourseLevel'] , x['FormattedDate'])
  count += 1

print(str(len(df)) +  "=" + str(count) +"  certificates generated succesfully")

11=11  certificates generated succesfully
